twoafternoon.trtlive, twoafternoon.any.trtlive

reading DEG

twoafternoon.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )
twoafternoon.any.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.any.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.soil_trtSBC_OLD = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day03 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day04 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day06 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day08 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day10 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day13 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day14 = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )

format data

# select genes with higher CV 
## classic way
co.var.df <- function(x) ( 100*apply(x,1,sd)/rowMeans(x) )
cpm.timecourse.v3.0$cv<-co.var.df(cpm.timecourse.v3.0[,-1])
# tidyverse way (no working)
#cpm.timecourse.v3.0 %>% slice(1:100) %>% select(-1) %>% group_by(%>% mutate(cv=map(.,co.var.df ))
a<-hist(cpm.timecourse.v3.0$cv)

a
## $breaks
##  [1]   0  50 100 150 200 250 300 350 400 450 500 550 600 650 700
## 
## $counts
##  [1] 19889  5976   989   270    87    45    24    12     7     4     2     1
## [13]     1     1
## 
## $density
##  [1] 1.456643e-02 4.376739e-03 7.243299e-04 1.977443e-04 6.371759e-05
##  [6] 3.295738e-05 1.757727e-05 8.788633e-06 5.126703e-06 2.929544e-06
## [11] 1.464772e-06 7.323861e-07 7.323861e-07 7.323861e-07
## 
## $mids
##  [1]  25  75 125 175 225 275 325 375 425 475 525 575 625 675
## 
## $xname
## [1] "cpm.timecourse.v3.0$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# there are genes with extream value
cpm.timecourse.v3.0 %>% filter(cv>600)
# Check expression pattern
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = cpm.timecourse.v3.0 %>% dplyr::filter(cv>450) %>% dplyr::slice(1:20)) ->p
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
p
## Error in eval(expr, envir, enclos): object 'p' not found
ggsave(filename="../output/highCV.absvalue.genes.expression.png",width=11,height=8) # should I remove them????
# 
sum(as.integer(cpm.timecourse.v3.0$cv>30))/dim(cpm.timecourse.v3.0)[1] # [1] 0.5207265
## [1] 0.5207265
sum(as.integer(cpm.timecourse.v3.0$cv>40))/dim(cpm.timecourse.v3.0)[1] # [1] 0.3725282. Larger CV than SAS timecourse data ()??? Due to non log absolute expression value.
## [1] 0.3725282
# cf. sum(as.integer(SAS.expression.vst.s.kazu$cv>4.5))/dim(SAS.expression.vst.s.kazu)[1] #[1] 0.2300789

use lon transformed data

cpm.timecourse.v3.0.log$cv<-co.var.df(cpm.timecourse.v3.0.log[,-1])
b<-hist(cpm.timecourse.v3.0.log$cv)

b
## $breaks
##  [1] -300000 -280000 -260000 -240000 -220000 -200000 -180000 -160000 -140000
## [10] -120000 -100000  -80000  -60000  -40000  -20000       0   20000   40000
## [19]   60000   80000  100000  120000  140000
## 
## $counts
##  [1]     1     0     0     1     1     0     0     0     1     0     1     1
## [13]     1     7  1648 25634     7     1     1     2     0     1
## 
## $density
##  [1] 1.830965e-09 0.000000e+00 0.000000e+00 1.830965e-09 1.830965e-09
##  [6] 0.000000e+00 0.000000e+00 0.000000e+00 1.830965e-09 0.000000e+00
## [11] 1.830965e-09 1.830965e-09 1.830965e-09 1.281676e-08 3.017431e-06
## [16] 4.693496e-05 1.281676e-08 1.830965e-09 1.830965e-09 3.661931e-09
## [21] 0.000000e+00 1.830965e-09
## 
## $mids
##  [1] -290000 -270000 -250000 -230000 -210000 -190000 -170000 -150000 -130000
## [10] -110000  -90000  -70000  -50000  -30000  -10000   10000   30000   50000
## [19]   70000   90000  110000  130000
## 
## $xname
## [1] "cpm.timecourse.v3.0.log$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# use largeCV 
cpm.timecourse.v3.0.log.largeCV<-cpm.timecourse.v3.0.log[cpm.timecourse.v3.0[cpm.timecourse.v3.0$cv>40,"transcript_ID"],] 
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262   289  > [1] 10173   290  (02/01/2020) (cf. SAS.expression.vst.s.kazu.largeCV is 7025 288)
## [1] 10173   290
c<-hist(cpm.timecourse.v3.0.log.largeCV$cv)

c
## $breaks
##  [1] -30000 -25000 -20000 -15000 -10000  -5000      0   5000  10000  15000
## [11]  20000  25000  30000  35000
## 
## $counts
##  [1]    1    0    3    2    6  348 5692    6    4    1    1    0    1
## 
## $density
##  [1] 3.297609e-08 0.000000e+00 9.892828e-08 6.595218e-08 1.978566e-07
##  [6] 1.147568e-05 1.876999e-04 1.978566e-07 1.319044e-07 3.297609e-08
## [11] 3.297609e-08 0.000000e+00 3.297609e-08
## 
## $mids
##  [1] -27500 -22500 -17500 -12500  -7500  -2500   2500   7500  12500  17500
## [11]  22500  27500  32500
## 
## $xname
## [1] "cpm.timecourse.v3.0.log.largeCV$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
###########
#save(cpm.timecourse.v3.0.log.largeCV,file=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
write_csv(cpm.timecourse.v3.0.log.largeCV,path=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))

WGCNA

co-expression analysis by WGCNA

# The following setting is important, do not omit.
library(WGCNA) # errors in installing WGCNA on my computer at impute package installation (Jan 27, 2020). Use Whitney
options(stringsAsFactors = FALSE)
if(Sys.info()["nodename"]=="whitney") {
  enableWGCNAThreads(10) # in Whitney (Maloof lab server) 
} else if (Sys.info()["nodename"]=="Kazu-MBP.plb.ucdavis.edu") {
      enableWGCNAThreads(2) # in my computer
  }

run this in Whitney

#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# for some reasons in Whitney library columns were read ad character. Needs to fix it.
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"),
#                                          col_types=list(col_character(),col_double())) # error
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) # using classic read.csv in Whitney

#load(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
# 
 datExpr <-t(cpm.timecourse.v3.0.log.largeCV[,-1])
  # Choose a set of soft-thresholding powers
  powers = c(c(1:9), seq(from = 2, to=20, by=10))
  sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)  
  # Plot the results:
  #sizeGrWindow(9, 5)
  pdf("../output/largeCV.softthresholding.pdf",width=10,height=8)
  par(mfrow = c(1,2));
  cex1 = 0.9;
  # Scale-free topology fit index as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
       main = paste("Scale independence"));
  text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       labels=powers,cex=cex1,col="red");
  # this line corresponds to using an R^2 cut-off of h
  abline(h=0.90,col="red")
  # Mean connectivity as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], sft$fitIndices[,5],
       xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
       main = paste("Mean connectivity"))
  text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  dev.off()
  # 
  net = blockwiseModules(datExpr, power = 9,
                         TOMType = "unsigned", minModuleSize = 20,
                         reassignThreshold = 0, mergeCutHeight = 0.25,
                         numericLabels = TRUE, pamRespectsDendro = FALSE,
                         saveTOMs = TRUE,
                         saveTOMFileBase = "cpm.timecourse.v3.0.log.largeCV.TOM",
                         verbose = 3)
  save(net,file="../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")  
  # open a graphics window
  pdf(file="../output/largeCV.dendrogram.pdf",width=10,height=8)
  # Convert labels to colors for plotting
  mergedColors = labels2colors(net$colors)
  # Plot the dendrogram and the module colors underneath
  plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                      "Module colors",
                      dendroLabels = FALSE, hang = 0.03,
                      addGuide = TRUE, guideHang = 0.05)
  dev.off()
  # save parameters  
  moduleLabels = net$colors
  moduleColors = labels2colors(net$colors)
  MEs = net$MEs
  geneTree = net$dendrograms[[1]]
  save(MEs, moduleLabels, moduleColors, geneTree,file ="../output/all.largeCV.RData")

back to my vomputer to look WGCNA results

cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) 
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262   289 -> [1] 10173   290 (Feb 01, 2020)
## [1] 10173   290
load("../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")  
load("../output/all.largeCV.RData")
# how many modules?
  table(net$colors);length(table(net$colors)) # 7 modules
## 
##    0    1    2    3    4    5    6 
## 4968 4723  174  126   79   72   31
## [1] 7

adding gene name, annotations

cpm.timecourse.v3.0.log.largeCV.modules <- tibble(
  transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,
  modules=moduleColors
)
#cpm.timecourse.v3.0.log.largeCV.modules.list<-list(transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,modules=moduleColors)
## prep
# annotation file for v3.0annotation
Br.v3.0.At.BLAST <- read_csv(file.path("..","Annotation_copy","output","v3.0annotation","Brapa_v3.0_annotated.csv")) 
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   name = col_character(),
##   chrom = col_character(),
##   subject = col_character(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_full_name = col_character(),
##   At_gene_model_type = col_character(),
##   At_short_description = col_character(),
##   At_Curator_summary = col_character(),
##   At_Computational_description = col_character()
## )
## See spec(...) for full column specifications.
# This annotation is redundant with name (Br grene). Eg 
Br.v3.0.At.BLAST %>% filter(name=="BraA01g040570.3C")
# reduce the redundancy (112418)
Br.v3.0anno.At.BLAST.highscore <- Br.v3.0.At.BLAST %>% group_by(name) %>% arrange(desc(score)) %>% dplyr::slice(1)
# function for adding annotation
## get object name https://stackoverflow.com/questions/14577412/how-to-convert-variable-object-name-into-string
myfunc <- function(v1) {
  deparse(substitute(v1))
}
myfunc(foo)
## [1] "foo"
# adding annotation and write_csv adding ".v3.0anno.csv" to the object name.
addAnno<-function(DGE) {temp<-left_join(DGE %>% rownames_to_column(var="genes"),Br.v3.0anno.At.BLAST.highscore,by=c(genes="name")) %>%  dplyr::select(genes,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)} 

asign moduleColor to corresponding Br genes

  #Br.v3.0anno.At.BLAST.highscore.list<-list()
  Bra.v3.0_cdna.list<-list()
  #names(Bra.v3.0_cdna.list)<-names(Bra.v3.0_cdna)
names(Bra.v3.0_cdna) %in% cpm.timecourse.v3.0.log.largeCV.modules$transcript_ID

  for(i in 1:length(Bra.v3.0_cdna)) {
    print(paste("i is ",i))
    print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID))
        print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim())
        print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim() ==c(1,1))
temp<-cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(transcript_ID)
print(dim(temp)[1]==0)
    if(dim(temp)[1]==0) next else
    #Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules[names(Bra.v3.0_cdna)[i],"modules"]
      # input module
Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(modules) %>% as_vector()
  # iput gene name
    names(Bra.v3.0_cdna.list)[[i]]<-names(Bra.v3.0_cdna)[i]
  }

# clean up Brgo.v3.0_cdna.list
  table(sapply(Bra.v3.0_cdna.list,is.null))
  Bra.v3.0_cdna.list<-Bra.v3.0_cdna.list[!sapply(Bra.v3.0_cdna.list,is.null)]
  table(sapply(Bra.v3.0_cdna.list,is.null))
  
  save(Bra.v3.0_cdna.list,file="../output/Bra.v3.0_cdna.list.Rdata")
 ######### Did not work
# cpm.timecourse.v3.0.log.largeCV.modules %>% nest(transcript_ID) # this is not what I want
# library(purrr)
#cpm.timecourse.v3.0.log.largeCV.modules %>% purrr::transpose() 

ORA analysis of DEGs

# loading module info as custom categories compatible with goseq()
load("../output/Bra.v3.0_cdna.list.Rdata")
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: XVector
## 
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: Rsamtools
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
## 
## Attaching package: 'GenomicAlignments'
## The following object is masked from 'package:dplyr':
## 
##     last
## 
## Attaching package: 'ShortRead'
## The following object is masked from 'package:dplyr':
## 
##     id
## The following object is masked from 'package:purrr':
## 
##     compose
## The following object is masked from 'package:tibble':
## 
##     view
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
## Attaching package: 'geneLenDataBase'
## The following object is masked from 'package:S4Vectors':
## 
##     unfactor
## 
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file 
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
##   A DNAStringSet instance of length 46250
##         width seq                                           names               
##     [1]  1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
##     [2]  1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
##     [3]   957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
##     [4]  1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
##     [5]   774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
##     ...   ... ...
## [46246]   162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247]  1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248]  1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249]   870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250]  1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# special funciton for GOseq
GOseq.customcategory.ORA<-function(genelist,padjust=0.05,custom.category.list=Bra.v3.0_cdna.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05. 
  
  bias<-nchar(Br_cdna)
  names(bias)<-names(Br_cdna)
  TF<-(names(bias) %in% genelist)*1
  names(TF)<-names(bias)
  #print(TF)
  pwf<-nullp(TF,bias.data=bias)
  #print(pwf$DEgenes)
  GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
  #GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
  GO.pval$over_represented_padjust<-p.adjust(GO.pval$over_represented_pvalue,method="BH")
  if(GO.pval$over_represented_padjust[1]>padjust) return("no enriched GO")
  else {
    enriched.GO<-GO.pval[GO.pval$over_represented_padjust<padjust,] 
    print("enriched.GO is")
    print(enriched.GO)
    return(enriched.GO)
  }
}
gene.up<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC>0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
gene.down<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC<0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()

enriched.GO.up<-GOseq.customcategory.ORA(genelist=gene.up) # needs to wait for Bra.v3.0_cdna.list.Rdata ready in Whitney
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...

## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 13 lightyellow            5.053465e-08                1.0000000          7
## 21         tan            1.651936e-04                0.9999827          6
##    numInCat over_represented_padjust
## 13       32             1.162297e-06
## 21       73             1.899727e-03
enriched.GO.down<-GOseq.customcategory.ORA(genelist=gene.down)
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...

## [1] "enriched.GO is"
##      category over_represented_pvalue under_represented_pvalue numDEInCat
## 23     yellow           4.518946e-176                1.0000000        149
## 16       pink            2.553492e-32                1.0000000         40
## 12 lightgreen            1.895763e-05                0.9999984          7
## 17     purple            1.741131e-03                0.9995694          9
##    numInCat over_represented_padjust
## 23      242            1.039358e-174
## 16      128             2.936516e-31
## 12       33             1.453418e-04
## 17      101             1.001150e-02

expression pattern of module/genes of interest (logFC of soil treatment)

n<-1
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes=gene.up.category[1:10,]) # works
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found

expression pattern of module/genes of interest (normalized value)

# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)

gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
gene.down.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.down,modules==enriched.GO.up$category[n])

expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(data=cpm.timecourse.v3.0.scale,target.genes=gene.up.category[1,]) 
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found

using map-column method???

input<-tribble(
  ~target.genes,~data,~f,
  gene.up.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,
  gene.down.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2
)
input2<-tribble(
  ~f,~param,
  expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.up.category[1:10,],data=cpm.timecourse.v3.0.scale),
  expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.down.category[1:10,],data=cpm.timecourse.v3.0.scale)
)

test<-input2 %>% mutate(output=invoke_map(f,param)) # works, but parameters are not visible
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
# how about to use map2?
## an example
params<-tribble(
    ~mean,~sd,~n,
  5,1,1,
  10,5,3,
  -3,10,5
)
params %>% pmap(rnorm) 
## [[1]]
## [1] 6.707234
## 
## [[2]]
## [1] 10.660708 15.642700  3.259823
## 
## [[3]]
## [1]  -8.105668 -14.774122   5.708235  22.881898  10.536788
# 
input3<-tribble(
  ~target.genes,~data,~title,
  gene.up.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil up",
  gene.down.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil down",
)
#input3 %>% pmap(expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2) -> expression.pattern
# 
expression.pattern <- input3 %>% mutate(plot=pmap(.,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2))
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
expression.pattern$plot[1] # plot
## Error in eval(expr, envir, enclos): object 'expression.pattern' not found
#input3 %>% mutate(plot=invoke_map(~expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2)) # errors

calculate fold change (trial with six genes) (run only once)

temp.abs<-cpm.timecourse.v3.0.log %>% head() %>%
  gather(sample,value,-transcript_ID) %>%
  mutate(abs.value=2^value) %>%
  inner_join(sample.description.timecourse, by="sample") %>% 
  split(.$soil_trt) 
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil) 
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time"),by="group")
# check logFC range
range(cpm.timecourse.v3.0.logFC$logFC) #(Jan 31, 2020)

calculate fold change (full genes) (run only once)

temp.abs<-cpm.timecourse.v3.0.log  %>%
  gather(sample,value,-transcript_ID) %>% mutate(abs.value=2^value) %>%
  inner_join(sample.description.timecourse, by="sample") %>% 
  split(.$soil_trt) 
# mean of absolute value funciton
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
# calculating absolute value mean
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil) 
# making summary tibble
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# add sample info
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse.logFC,by="group")
# check
dim(cpm.timecourse.v3.0.logFC)
# check frequency distribution
a<-hist(cpm.timecourse.v3.0.logFC$logFC) # most of them are small
a
# what are genes with super high logFC?
high.FC.genes<-cpm.timecourse.v3.0.logFC %>% filter(abs(logFC)>5) %>% dplyr::select(transcript_ID) 
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = high.FC.genes[1:10,])
#
addAnno2<-function(DGE) {temp<-left_join(DGE,Br.v3.0anno.At.BLAST.highscore,by=c("transcript_ID"="name")) %>%  dplyr::select(transcript_ID,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)} 
#
addAnno2(high.FC.genes)
#
dim(cpm.timecourse.v3.0.logFC) #[1] 1110000       5

#write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv") # too large (306 M)
write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv.gz") # 12.3 M

logFC expression pattern (only two afternoon)

cpm.timecourse.v3.0.logFC <-read_csv("../output/cpm.timecourse.v3.0.logFC.csv.gz")
## Parsed with column specification:
## cols(
##   transcript_ID = col_character(),
##   group = col_character(),
##   logFC = col_double(),
##   sampling_day = col_character(),
##   sampling_time = col_character()
## )
target.genes<-gene.up
# expression.pattern.Br.graph.timecourse.v3.0annotation.logFC<-function(data=cpm.timecourse.v3.0.logFC,target.genes,title="",subset.data="only_two_afternoon"){
# #print(paste("data is",data[1:10,]))
# #print(paste("tissue.type is root"))
# data[is.na(data)] <- 0 #
# data.temp<-data  %>% dplyr::filter(transcript_ID %in% target.genes) 
# 
# # if (2-afternoon=TRUE)
# if (subset.data=="only_two_afternoon") {
# p<-data.temp %>% ggplot(aes(x=sampling_day,y=logFC))  + 
#   geom_boxplot(alpha = 0.5)  + 
#   theme_bw() +
#   theme(strip.text.y=element_text(angle=0),axis.text.x=element_text(angle=90)) +
#   theme(legend.position="bottom") + labs(title=title)
# p
# } else {print("Define subset.data other than only_two_afternoon.")}
# }
# test the function
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")

J meeting (Jan 28, 2020)

K-means clustering

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>% 
  inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread) # [1] 1442  97
## [1] 1442   97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],
                                     centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.8 <- kClust.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.8) %>% group_by(cluster) %>% summarize(n=sum(cluster)) 
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}
kClustcentroids.8 <- sapply(levels(factor(kClusters.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.8)

Plotting the centroids to see how they behave: tidyverse version

library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     trim
## The following object is masked from 'package:GenomicRanges':
## 
##     trim
## The following object is masked from 'package:Biostrings':
## 
##     collapse
## The following objects are masked from 'package:IRanges':
## 
##     collapse, trim
## The following object is masked from 'package:dplyr':
## 
##     collapse
# adding sample description to data
  data.sample<-kClustcentroids.8 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
  p8<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
p8

ggsave(p8,file="../output/Twoafternoon.DEG.Kmean.8clusters.png",width=11,height=8)

Let’s perform the actual clsutering using K=5:

set.seed(20)
kClust.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.5 <- kClust.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.5 <- sapply(levels(factor(kClusters.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.5)

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.5 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.5.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:480)[!duplicated(.$group.cluster)]) # only cluster 1... why???
# plot
  p5<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level") 
p5

ggsave(p5,file="../output/Twoafternoon.DEG.Kmean.5clusters.png",width=11,height=8)

logFC K-means

expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.logFC.twoafternoon.DEG<-cpm.timecourse.v3.0.logFC %>% 
  inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>% filter(sampling_time=="2_afternoon")
# spread
cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.logFC.twoafternoon.DEG %>% dplyr::select(transcript_ID,group,logFC) %>% spread(group,logFC,-1)
dim(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread) # [1] 1474  97
## [1] 1442    9
# calculate wss
wss.logFC <- (nrow(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss.logFC[i] <- sum(kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],
                                     centers=i,iter.max = 10)$withinss,iter.max=20) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
## Warning: did not converge in 10 iterations
plot(1:20, wss.logFC, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

# Let’s perform the actual clsutering using K=5:

set.seed(20)
kClust.logFC.5 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.logFC.5 <- kClust.logFC.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.logFC.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.logFC.5 <- sapply(levels(factor(kClusters.logFC.5)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.5)

Plotting the centroids to see how they behave: tidyverse version

# making sample.description.timecourse.logFC
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# plot
p.logFC.5<-kClustcentroids.logFC.5 %>% as_tibble(rownames="group") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse.logFC,by="group") %>% 
  inner_join(cluster.5.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) ) %>%
  ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") +
  facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level") 
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.5

ggsave(p.logFC.5,file="../output/Twoafternoon.DEG.logFC.Kmean.5clusters.png",width=11,height=8)

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.logFC.8 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.logFC.8 <- kClust.logFC.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.logFC.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.logFC.8 <- sapply(levels(factor(kClusters.logFC.8)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.8)

Plotting the centroids to see how they behave: tidyverse version

p.logFC.8<-kClustcentroids.logFC.8 %>% as_tibble(rownames="group") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse.logFC,by="group") %>% 
  inner_join(cluster.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) ) %>%
  ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") +
  facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.8

ggsave(p.logFC.8,file="../output/Twoafternoon.DEG.logFC.Kmean.8clusters.png",width=11,height=8)

conclusion

load Brgo.v3.0anno.Atgoslim.BP.list

load(file.path("..","Annotation_copy","output","v3.0annotation","Brgo.v3.0anno.Atgoslim.BP.list.Rdata"))

load GO.ORA function

GOseq function for Brassica rapa (v3.0)

# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file 
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
##   A DNAStringSet instance of length 46250
##         width seq                                           names               
##     [1]  1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
##     [2]  1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
##     [3]   957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
##     [4]  1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
##     [5]   774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
##     ...   ... ...
## [46246]   162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247]  1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248]  1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249]   870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250]  1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# GOseq function
GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA<-function(genelist,padjust=0.05,ontology="BP",custom.category.list=Brgo.v3.0anno.Atgoslim.BP.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05. 
  
  bias<-nchar(Br_cdna)
  names(bias)<-names(Br_cdna)
  TF<-(names(bias) %in% genelist)*1
  names(TF)<-names(bias)
  #print(TF)
  pwf<-nullp(TF,bias.data=bias)
  #print(pwf$DEgenes)
  GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
  #GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
  
  #head(GO.pval) 
  if(ontology=="BP") {
    GO.pval2<-subset(GO.pval,ontology=="BP")
  } else if(ontology=="CC") {
    GO.pval2<-subset(GO.pval,ontology=="CC")
  } else {
    GO.pval2<-subset(GO.pval,ontology=="MF")
  }
    
  GO.pval2$over_represented_padjust<-p.adjust(GO.pval2$over_represented_pvalue,method="BH")
  if(GO.pval2$over_represented_padjust[1]>padjust) return("no enriched GO")
  else {
    enriched.GO<-GO.pval2[GO.pval2$over_represented_padjust<padjust,] 
    print("enriched.GO is")
    print(enriched.GO)
    
    ## write Term and Definition 
    for(i in 1:dim(enriched.GO)[1]) {
      if(is.null(Term(GOTERM[enriched.GO[i,"category"]]))) {next} else {
      enriched.GO$Term[i]<-Term(GOTERM[[enriched.GO[i,"category"]]])
      enriched.GO$Definition[i]<-Definition(GOTERM[[enriched.GO[i,"category"]]])
      }
    }
    return(enriched.GO)
  }
}
#
head(Bra.v3.0_cdna)
##   A DNAStringSet instance of length 6
##     width seq                                               names               
## [1]  1254 ATGCGACCACCGGGTGTTGTTTC...CTGAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2]  1668 ATGCCAGCAATGCATGCCGTTTT...GTAGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3]   957 ATGATGCTTCTCGTTCATACCCG...GGAACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4]  1299 ATGAGTCGTCTTCTCCTTGCTCA...GTGGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5]   774 ATGGATTCTGGGCTTCAGCATCT...AAGGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## [6]  3327 ATGGCGTCCACTCCTCCTCAAAA...GCGGTGGGTTTCAATTTCCTTGA BraA01g000060.3C
# length(bias) # 44239 > 45019 where the bias come from?
#  bias.data vector must have the same length as DEgenes vector!

GO ORA of each cluster

temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread$transcript_ID, cluster=kClusters.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            1.732470e-08                1.0000000         18
## 423  GO:0006468            1.855801e-08                1.0000000         29
## 2750 GO:0050832            1.926103e-08                1.0000000         14
## 1858 GO:0031348            2.494123e-05                0.9999987          5
## 638  GO:0006952            3.373325e-05                0.9999904         18
## 890  GO:0009611            4.127141e-05                0.9999930         10
## 3442 GO:1900067            5.827127e-05                0.9999999          2
## 1000 GO:0009814            6.360706e-05                0.9999976          4
## 921  GO:0009651            6.434943e-05                0.9999832         15
## 3242 GO:0080119            1.300857e-04                0.9999973          3
##      numInCat                                           term ontology
## 2253      726                  defense response to bacterium       BP
## 423      1484                        protein phosphorylation       BP
## 2750      469                     defense response to fungus       BP
## 1858       64        negative regulation of defense response       BP
## 638      1165                               defense response       BP
## 890       419                           response to wounding       BP
## 3442        3 regulation of cellular response to alkaline pH       BP
## 1000       49     defense response, incompatible interaction       BP
## 921      1045                        response to salt stress       BP
## 3242       17                           ER body organization       BP
##      over_represented_padjust
## 2253             2.432668e-05
## 423              2.432668e-05
## 2750             2.432668e-05
## 1858             2.362558e-02
## 638              2.556305e-02
## 890              2.606289e-02
## 3442             2.709111e-02
## 1000             2.709111e-02
## 921              2.709111e-02
## 3242             4.928947e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 390 GO:0006412             3.80768e-07                        1         11
##     numInCat        term ontology over_represented_padjust
## 390      715 translation       BP               0.00144273
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            3.409045e-11                        1          7
## 2259 GO:0042761            1.726215e-08                        1          5
## 1181 GO:0010143            1.931256e-06                        1          4
##      numInCat                                            term ontology
## 1291       41                    suberin biosynthetic process       BP
## 2259       30 very long-chain fatty acid biosynthetic process       BP
## 1181       29                      cutin biosynthetic process       BP
##      over_represented_padjust
## 1291             1.291687e-07
## 2259             3.270314e-05
## 1181             2.439177e-03
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            3.659965e-11                1.0000000         17
## 2253 GO:0042742            2.432098e-08                1.0000000         23
## 1174 GO:0010120            7.293180e-08                1.0000000          6
## 1166 GO:0010112            6.780014e-07                1.0000000          5
## 638  GO:0006952            8.277917e-07                0.9999998         27
## 899  GO:0009626            2.954187e-06                0.9999997          9
## 894  GO:0009617            3.846114e-06                0.9999994         11
## 890  GO:0009611            6.457569e-06                0.9999987         14
## 855  GO:0009409            1.517719e-05                0.9999961         17
## 944  GO:0009697            2.834944e-05                0.9999992          4
## 2750 GO:0050832            3.456585e-05                0.9999924         13
## 1205 GO:0010193            5.358930e-05                0.9999968          5
## 960  GO:0009737            7.818971e-05                0.9999759         18
## 187  GO:0002237            8.857740e-05                0.9999917          6
## 185  GO:0002229            1.515927e-04                0.9999802          7
##      numInCat                                       term ontology
## 1210      286                         response to chitin       BP
## 2253      726              defense response to bacterium       BP
## 1174       27             camalexin biosynthetic process       BP
## 1166       22 regulation of systemic acquired resistance       BP
## 638      1165                           defense response       BP
## 899       140         plant-type hypersensitive response       BP
## 894       241                      response to bacterium       BP
## 890       419                       response to wounding       BP
## 855       696                           response to cold       BP
## 944        21        salicylic acid biosynthetic process       BP
## 2750      469                 defense response to fungus       BP
## 1205       54                          response to ozone       BP
## 960       832                  response to abscisic acid       BP
## 187        88   response to molecule of bacterial origin       BP
## 185       126              defense response to oomycetes       BP
##      over_represented_padjust
## 1210             1.386761e-07
## 2253             4.607609e-05
## 1174             9.211286e-05
## 1166             6.273006e-04
## 638              6.273006e-04
## 899              1.865569e-03
## 894              2.081846e-03
## 890              3.058466e-03
## 855              6.389597e-03
## 944              1.074160e-02
## 2750             1.190636e-02
## 1205             1.692082e-02
## 960              2.278929e-02
## 187              2.397284e-02
## 185              3.829231e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732            8.264952e-07                1.0000000          5
## 3114 GO:0071369            1.490655e-06                1.0000000          5
## 3094 GO:0071281            7.588090e-06                0.9999997          5
## 254  GO:0006096            9.647819e-06                0.9999994          6
##      numInCat                                   term ontology
## 3164       52      cellular response to nitric oxide       BP
## 3114       62 cellular response to ethylene stimulus       BP
## 3094       77          cellular response to iron ion       BP
## 254       138                     glycolytic process       BP
##      over_represented_padjust
## 3164              0.002824047
## 3114              0.002824047
## 3094              0.009138897
## 254               0.009138897
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.trtsoil.DEG.Kmeans.cluster.csv")

any trt DEGs (two afternoon) clustering and cluster ORAs

K-means clustering

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>% 
  inner_join(twoafternoon.any.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread) # [1] 2178   97
## [1] 2178   97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],
                                     centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.any.trtlive.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.8 <- kClust.any.trtlive.8$cluster
# number of clusters
cluster.any.trtlive.8.num<-tibble(cluster=kClusters.any.trtlive.8) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.8.num$cluster<-as.character(cluster.any.trtlive.8.num$cluster) # classic way
cluster.any.trtlive.8.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}

kClustcentroids.any.trtlive.8 <- sapply(levels(factor(kClusters.any.trtlive.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.8)
kClustcentroids.any.trtlive.8
##                                     1           2            3           4
## 1a1_q_002_S1_R1_001       0.667743321 -0.32765717 -0.046643192 -0.39677662
## 1a3_q_004_S3_R1_001      -0.164654334 -0.35598558 -0.231303948 -0.64957240
## 1a7_q_007_d8_S7_R1_001   -0.099544549  0.07985282 -0.050667391 -0.23201923
## 1a8_q_008_d8_S8_R1_001    0.297267619 -0.41141910 -0.162422020 -0.61640584
## 1c6_q_028_S22_R1_001      0.056987377 -0.44714663 -0.126738544 -0.46592398
## 1d3_q_038_S27_R1_001     -0.064943110 -0.01126279 -0.135680172 -0.46607100
## 1d5_q_042_S29_R1_001      0.200238193 -0.29080793 -0.118800923 -0.38894467
## 1e4_q_055_S36_R1_001      0.003589494 -0.29876284 -0.122716897 -0.42267062
## 1e5_q_056_S37_R1_001      0.726417562  1.43425878 -0.021857251  1.25689847
## 1e6_q_057_S38_R1_001      0.995977726  0.26252132 -0.001403176  0.65860678
## 1e8_q_059_S40_R1_001      0.112858377 -0.18738002 -0.167528572 -0.33182326
## 1f1_q_060_S41_R1_001      0.580798020 -0.44206099 -0.119842108 -0.48519518
## 1f2_q_062_S42_R1_001      1.390453319 -0.50086543 -0.106813122 -0.61309121
## 1f3_q_065_S43_R1_001      0.309852659 -0.35553902 -0.164473983 -0.70604376
## 1f4_q_066_S44_R1_001     -0.275865455  0.84091426 -0.041472560  0.10389319
## 1f5_q_068_S45_R1_001      0.218063039  1.13773485 -0.035367142  0.76190021
## 1g3_q_075_S51_R1_001      0.170652760 -0.49476679 -0.254659555 -0.86152704
## 1g7_q_082_S55_R1_001      0.989460365  0.04695988  0.067401377  0.36856141
## 1h3_q_086_d8_S59_R1_001  -0.049775972 -0.38980204 -0.163234526 -0.72735124
## 1h4_q_090_d8_S60_R1_001   0.177758325 -0.26796944 -0.192104972 -0.59821603
## 1i2_q_104_S66_R1_001      0.448865079  1.41475065 -0.031466952  1.20248943
## 1i3_q_105_S67_R1_001     -0.051509530 -0.53810160 -0.187382050 -0.85025716
## 1i7_q_110_S71_R1_001      0.068749205 -0.49607172 -0.197040461 -0.60838606
## 1j3_q_114_S75_R1_001      0.403202781 -0.32776759 -0.142145451 -0.38805290
## 1j4_q_115_S76_R1_001      0.432092698 -0.40115361 -0.128720389 -0.68790645
## 1j6_q_117_S78_R1_001      1.446221301 -0.35008313 -0.174803857 -0.35626757
## 1j7_q_118_S79_R1_001     -0.028721586 -0.09942786 -0.121350305 -0.45278572
## 1k2_q_121_S82_R1_001      0.156892451 -0.21364902 -0.163163385 -0.16031310
## 1k4_q_127_S84_R1_001     -0.124611222  0.15915694 -0.027022813 -0.17581703
## 1k5_q_128_S85_R1_001     -0.042266884 -0.32432645 -0.124890153 -0.43393167
## 1k6_q_130_S86_R1_001     -0.205451877 -0.26762999 -0.135008473 -0.73030065
## 1k8_q_135_S88_R1_001      1.438110763 -0.57711139 -0.131422307 -0.60401603
## 2a1_q_146_S97_R1_001     -0.290204703  1.50729788  0.033306735  0.66315456
## 2a7_q_153_S103_R1_001     0.121751075 -0.14219194 -0.161296532 -0.22383676
## 2b1_q_156_S105_R1_001    -0.370461684  0.21563344 -0.079958116 -0.38952862
## 2b2_q_158_S106_R1_001    -0.092906871 -0.33983305 -0.129935262 -0.61728673
## 2b3_q_160_S107_R1_001     0.801826670 -0.07853962 -0.063934905 -0.32388388
## 2b7_q_165_S111_R1_001     1.941264814  0.04399119  0.014610245  0.47847149
## 2b8_q_167_S112_R1_001    -0.439497792  0.31449571 -0.124601004 -0.31894277
## 2c1_q_168_S113_R1_001    -0.332622316 -0.37936089 -0.271802644 -0.96239754
## 2c3_q_171_S115_R1_001     0.747974239  0.26976923 -0.147044784  0.83162456
## 2c4_q_172_d8_S116_R1_001 -0.177353848 -0.42401180 -0.269084210 -0.92547133
## 2c5_q_173_S117_R1_001    -0.153468985 -0.21201987 -0.128152266 -0.56160077
## 2c7_q_178_S119_R1_001     0.490658837 -0.13476241 -0.113390560 -0.51540415
## 2d2_q_181_S122_R1_001    -0.038406305 -0.34746080 -0.226491607 -0.92326944
## 2d7_q_189_S127_R1_001     0.076776506 -0.38918324 -0.195755268 -0.59211153
## 2e6_q_200_d8_S134_R1_001  0.148508712 -0.47631926 -0.196953155 -0.93980129
## 2f6_q_212_S142_R1_001    -0.686284077  0.74811782 -0.055292537 -0.18810402
## 2g1_q_217_S145_R1_001     0.093629703  0.92356181 -0.081688975  0.70480619
## 2g4_q_221_S148_R1_001     0.514267158 -0.05354395 -0.122223428  0.13477216
## 2g8_q_229_S152_R1_001    -0.085726859 -0.58175229 -0.241013995 -1.00232529
## 2h1_q_230_S153_R1_001     0.594540848  1.42283734  0.074523033  0.92677550
## 2h4_q_235_S156_R1_001     0.047703914 -0.54772186 -0.230598200 -0.82989457
## 2h7_q_238_S159_R1_001    -0.086813792 -0.36230319 -0.150059576 -0.56791485
## 2i1_q_242_S161_R1_001     1.824516917 -0.50240460 -0.161926754 -0.15096296
## 2i4_q_246_S164_R1_001     0.086726430 -0.27646481 -0.114508669 -0.63570881
## 2i5_q_247_S165_R1_001     0.687983021 -0.48426977 -0.165921004 -0.63404337
## 2i6_q_247_d8_S166_R1_001 -0.005610120 -0.40071256 -0.160537632 -0.75763689
## 2j1_q_250_S169_R1_001    -0.163029116  0.41207603 -0.073569058 -0.05144553
## 2j4_q_254_S172_R1_001     1.939829155 -0.46710567 -0.123599693 -0.16694577
## 2j6_q_257_S174_R1_001     0.339197640 -0.30685656 -0.202345914 -0.84711496
## 2j7_q_259_d8_S175_R1_001 -0.170478161  0.11700872 -0.059573400 -0.25999939
## 2k4_q_267_S180_R1_001    -0.043621197 -0.03910844 -0.095527983 -0.20542526
## 2l4_q_282_S188_R1_001    -0.508317144  1.57909885 -0.015517867  0.29050305
## 3a1_q_289_S193_R1_001     0.360316641 -0.09362945 -0.183379817  0.09466893
## 3a4_q_294_S196_R1_001     1.654676569 -0.38124590 -0.096435809 -0.33989249
## 3b5_q_305_S205_R1_001    -0.399262440  0.12774387 -0.163787765 -0.49305472
## 3c2_q_312_S210_R1_001     0.304620799 -0.42498325 -0.179310235 -0.44503651
## 3c5_q_317_S213_R1_001     1.890696314  0.13603595 -0.032988050  0.50388257
## 3c7_q_322_S215_R1_001     0.379544777  1.01519691 -0.092914085  1.08195234
## 3d2_q_325_S218_R1_001    -0.082659502 -0.52454544 -0.199749838 -0.92002753
## 3d6_q_333_S222_R1_001    -0.068865617 -0.45913790 -0.206269355 -0.67135111
## 3e2_q_342_S226_R1_001     0.137660317 -0.01778525 -0.125044996 -0.21837645
## 3e3_q_343_S227_R1_001     0.228599212  0.85867987 -0.023059172  0.46679634
## 3e5_q_348_S229_R1_001     0.110151868 -0.39202581 -0.168827285 -0.47746082
## 3f2_q_353_d8_S234_R1_001  0.580631531 -0.31787248 -0.131266972 -0.33323815
## 3f4_q_356_S236_R1_001     0.241684607 -0.38426428 -0.174436203 -0.83738845
## 3f5_q_358_S237_R1_001     0.055736388 -0.48907374 -0.161154767 -0.69096928
## 3f7_q_360_S239_R1_001     1.240685536 -0.51115748 -0.166456941 -0.37523025
## 3f8_q_360_d8_S240_R1_001  0.206508248 -0.54884151 -0.231965593 -1.04556939
## 3h1_q_372_S249_R1_001    -0.321931940 -0.20062876 -0.145521677 -0.49078705
## 3h2_q_374_S250_R1_001    -0.077989616 -0.40957242 -0.199900886 -1.00888379
## 3h3_q_375_d8_S251_R1_001  0.023918597 -0.50825597 -0.252555408 -1.06700296
## 3h5_q_377_S253_R1_001     0.206624602  0.93161082 -0.113117780  0.95781878
## 3h8_q_387_d8_S256_R1_001 -0.520567487  0.57433447 -0.073081784 -0.17028803
## 3i3_q_391_S259_R1_001     0.466877113  1.21686116  0.002709982  0.97244657
## 3i4_q_392_S260_R1_001    -0.437682578  0.21600207 -0.092443264 -0.39239927
## 3j4_q_403_S268_R1_001     0.127935282 -0.31015358 -0.113554006 -0.61933737
## 3k1_q_411_S273_R1_001    -0.531958895  1.74341114  0.057241491  0.43232569
## 3k6_q_417_S278_R1_001     0.096255676 -0.54533667 -0.194263773 -0.88040526
## 3k7_q_419_S279_R1_001     1.410969598 -0.50565904 -0.096625964 -0.36010574
## 3k8_q_420_S280_R1_001    -0.147982771 -0.32632497 -0.125488478 -0.51739957
## 3l1_q_421_S281_R1_001    -0.378719653 -0.07379489 -0.231018739 -0.45002638
## 3l4_q_426_S284_R1_001     0.885380743 -0.33277685 -0.115217251 -0.41233264
## 3l6_q_428_S286_R1_001    -0.414310189 -0.45589789 -0.177417551 -0.90216484
## 3l8_q_432_S288_R1_001    -0.436179690 -0.47260761 -0.209829063 -0.87984537
##                                     5            6            7            8
## 1a1_q_002_S1_R1_001      -0.153958174 -0.924921160 -0.354781005 -0.584426385
## 1a3_q_004_S3_R1_001      -0.274268070 -0.447217126  0.589512528  0.598921297
## 1a7_q_007_d8_S7_R1_001   -0.018767203 -0.383198263  0.723989576  0.051023263
## 1a8_q_008_d8_S8_R1_001   -0.283301202 -0.447288310  0.556428670 -0.032406247
## 1c6_q_028_S22_R1_001     -0.116324182  0.359480786 -0.040932142  0.001411183
## 1d3_q_038_S27_R1_001     -0.018124208 -0.272091794  0.518926045 -0.120575363
## 1d5_q_042_S29_R1_001     -0.004841077  0.222942498  0.094901649  0.322552795
## 1e4_q_055_S36_R1_001     -0.023384604  0.686235337  0.304362191  0.201468554
## 1e5_q_056_S37_R1_001      3.001108103  2.794726489 -0.609160267  1.254886850
## 1e6_q_057_S38_R1_001      1.278834477  0.515986324 -0.565823895  0.673239009
## 1e8_q_059_S40_R1_001      0.042664972  0.148954386  0.371113939  0.840546796
## 1f1_q_060_S41_R1_001     -0.073907423  0.198346493 -0.163200857  0.225007773
## 1f2_q_062_S42_R1_001      0.279644263 -0.773724225 -0.779136809 -0.556446579
## 1f3_q_065_S43_R1_001     -0.138578664 -0.148853275  0.222174334 -0.252479954
## 1f4_q_066_S44_R1_001      0.742262686 -0.087123478 -0.093545206 -0.128993798
## 1f5_q_068_S45_R1_001      1.598916046  0.840402298 -0.225206345  0.470472125
## 1g3_q_075_S51_R1_001     -0.463685522 -0.847881944  0.540982295 -0.182646702
## 1g7_q_082_S55_R1_001      0.752992714 -0.774934794 -0.451953573 -0.112733338
## 1h3_q_086_d8_S59_R1_001  -0.501378182 -0.510109918  0.657516090 -0.328068192
## 1h4_q_090_d8_S60_R1_001  -0.121186940 -0.562001832  0.262405135 -0.132406627
## 1i2_q_104_S66_R1_001      2.808293432  1.329847936 -0.724093805  1.210904204
## 1i3_q_105_S67_R1_001     -0.423748128 -0.226203882  0.168668688 -0.076734949
## 1i7_q_110_S71_R1_001     -0.296350739 -0.238351823 -0.007081755  0.343755903
## 1j3_q_114_S75_R1_001     -0.021813427 -0.243916463  0.229504937  0.075125509
## 1j4_q_115_S76_R1_001     -0.196616237 -0.890961600 -0.196397041 -0.659117803
## 1j6_q_117_S78_R1_001      0.549444253 -0.325991782 -0.734857987 -0.115204243
## 1j7_q_118_S79_R1_001      0.037219558 -0.260465150  0.534407772 -0.053151031
## 1k2_q_121_S82_R1_001      0.199362890 -0.364045001  0.301727533  1.351445206
## 1k4_q_127_S84_R1_001      0.173924348 -0.406927751  0.072079474 -0.473781730
## 1k5_q_128_S85_R1_001     -0.215753164 -0.072101111  0.259760791 -0.062410241
## 1k6_q_130_S86_R1_001     -0.460316168 -0.400289996  0.389949688 -0.616419181
## 1k8_q_135_S88_R1_001      0.018683019 -0.904276921 -0.969458968 -0.723832177
## 2a1_q_146_S97_R1_001      1.548552328  1.536690333 -0.178393303  0.054346438
## 2a7_q_153_S103_R1_001     0.147745237 -0.438582913  0.683578621  0.459811399
## 2b1_q_156_S105_R1_001     0.020198161  0.618078195  0.182930022  0.944217317
## 2b2_q_158_S106_R1_001    -0.306414885  1.526392867  0.082234743 -0.018226581
## 2b3_q_160_S107_R1_001     0.390901598 -0.813561405 -0.565222051 -0.706699354
## 2b7_q_165_S111_R1_001     1.248019890  0.126337281 -0.934373530 -0.121122876
## 2b8_q_167_S112_R1_001     0.091921143 -0.156786025  0.969397006 -0.004710857
## 2c1_q_168_S113_R1_001    -0.446945531 -0.166032194  0.545028379 -0.348697158
## 2c3_q_171_S115_R1_001     1.583656713 -0.525062113 -0.639846863  0.753543137
## 2c4_q_172_d8_S116_R1_001 -0.355533240 -0.399971858  0.284564945 -0.227382478
## 2c5_q_173_S117_R1_001    -0.259914785  0.553831260  0.601120414 -0.148442431
## 2c7_q_178_S119_R1_001     0.135140823 -0.274655475 -0.441467600 -0.335061033
## 2d2_q_181_S122_R1_001    -0.412616594 -0.247284763  0.424106842 -0.555798480
## 2d7_q_189_S127_R1_001    -0.120166308 -0.007066215  0.501009353  0.597616838
## 2e6_q_200_d8_S134_R1_001 -0.573391211 -0.183495507  0.360228667 -0.428652845
## 2f6_q_212_S142_R1_001     0.430736702 -0.091817910  0.021102376 -0.352773200
## 2g1_q_217_S145_R1_001     1.474721140 -0.025911511 -0.341830399  0.304520478
## 2g4_q_221_S148_R1_001     0.647741677  0.176186584  0.036993965  1.910217724
## 2g8_q_229_S152_R1_001    -0.621887877 -0.463208823  0.194081547 -0.284880156
## 2h1_q_230_S153_R1_001     2.039957165  0.397077126 -0.589539442 -0.006630842
## 2h4_q_235_S156_R1_001    -0.504348358 -0.848503900  0.255414392 -0.148846897
## 2h7_q_238_S159_R1_001    -0.326914006  0.376117636  0.402644254 -0.140112370
## 2i1_q_242_S161_R1_001     0.434038292 -0.674249074 -0.747811958  0.007318162
## 2i4_q_246_S164_R1_001    -0.125010568 -0.278151201  0.240550082 -0.415217148
## 2i5_q_247_S165_R1_001    -0.381462914 -0.943061008 -0.352788461 -0.743383002
## 2i6_q_247_d8_S166_R1_001 -0.464964224 -0.634031044  0.223624265 -0.544913192
## 2j1_q_250_S169_R1_001     0.325284292 -0.169819425  0.681030330 -0.036504180
## 2j4_q_254_S172_R1_001     0.476705107 -0.847424041 -0.743644544  0.114926792
## 2j6_q_257_S174_R1_001    -0.218644633 -0.038083229  0.496308158 -0.431267644
## 2j7_q_259_d8_S175_R1_001  0.003665751 -0.030250939  0.549140225 -0.006946679
## 2k4_q_267_S180_R1_001     0.066252816  0.053435080  0.480857646  0.182634435
## 2l4_q_282_S188_R1_001     1.030555163  0.550425037  0.226764174  0.026764177
## 3a1_q_289_S193_R1_001     0.590364587 -0.417740150  0.262911604  2.570998452
## 3a4_q_294_S196_R1_001     0.366466737 -0.745802208 -0.666366936 -0.369921572
## 3b5_q_305_S205_R1_001     0.009970362 -0.112518855  0.814923500 -0.195784932
## 3c2_q_312_S210_R1_001     0.025100602  0.067827568  0.030897585  0.307255124
## 3c5_q_317_S213_R1_001     1.285242150 -0.434157916 -0.779605134  1.658738552
## 3c7_q_322_S215_R1_001     1.980884807  0.518454435 -0.276534431  1.163743299
## 3d2_q_325_S218_R1_001    -0.486130552 -0.083259435  0.300327702 -0.320047010
## 3d6_q_333_S222_R1_001    -0.275737319  0.380188979  0.232464582  0.853361004
## 3e2_q_342_S226_R1_001     0.221647875 -0.475727232  0.048413963 -0.132021913
## 3e3_q_343_S227_R1_001     1.313905172  0.048997273 -0.084524050  0.032460640
## 3e5_q_348_S229_R1_001    -0.136737036  0.025293329 -0.038963906  0.844919361
## 3f2_q_353_d8_S234_R1_001 -0.009009084 -0.424745025  0.396295713 -0.086210712
## 3f4_q_356_S236_R1_001    -0.296676268 -0.417604347  0.459312608 -0.665399869
## 3f5_q_358_S237_R1_001    -0.367387419  0.019020122  0.140396871 -0.282452074
## 3f7_q_360_S239_R1_001     0.232677999 -0.708896832 -0.788506091 -0.288040976
## 3f8_q_360_d8_S240_R1_001 -0.502458950 -0.045472372 -0.131730152 -0.433810292
## 3h1_q_372_S249_R1_001    -0.014092527  0.810541589  0.153714650  0.171429882
## 3h2_q_374_S250_R1_001    -0.420333546 -0.157066159  0.481945469 -0.500680953
## 3h3_q_375_d8_S251_R1_001 -0.626610929 -0.560543898  0.597881359 -0.532116758
## 3h5_q_377_S253_R1_001     1.897990604  0.558950444 -0.326640266  0.643963226
## 3h8_q_387_d8_S256_R1_001  0.144839064 -0.064330227  0.749218292 -0.086378266
## 3i3_q_391_S259_R1_001     2.298543000  0.285996895 -0.568169910  0.008373999
## 3i4_q_392_S260_R1_001     0.323715774 -0.242494345  0.459354969  0.147114346
## 3j4_q_403_S268_R1_001    -0.256017453 -0.912205830 -0.321592838 -0.824180372
## 3k1_q_411_S273_R1_001     1.113510333  0.569946172  0.153459978 -0.143470389
## 3k6_q_417_S278_R1_001    -0.452979794  0.226447792  0.182690271 -0.403184195
## 3k7_q_419_S279_R1_001     0.212632376 -0.760135990 -0.848613902 -0.546614988
## 3k8_q_420_S280_R1_001    -0.241773636 -0.050092535  0.430185733  0.092280541
## 3l1_q_421_S281_R1_001     0.185762457  0.165484548  0.344826889  0.400341980
## 3l4_q_426_S284_R1_001    -0.048090780 -1.117707933 -0.555011886 -0.821729061
## 3l6_q_428_S286_R1_001    -0.567873511  1.405126201 -0.096143189 -0.293971862
## 3l8_q_432_S288_R1_001    -0.478421532  0.085402829  0.308283750 -0.373295394

Plotting the centroids to see how they behave: tidyverse version

library(glue)
# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.8 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
p8.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
p8.any.trtlive

ggsave(p8.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.8clusters.png",width=11,height=8)

under construction

Let’s perform the actual clsutering using K=15:

set.seed(20)
kClust.any.trtlive.15 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.15 <- kClust.any.trtlive.15$cluster
# number of clusters
cluster.any.trtlive.15.num<-tibble(cluster=kClusters.any.trtlive.15) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.15.num$cluster<-as.character(cluster.any.trtlive.15.num$cluster) # classic way
cluster.any.trtlive.15.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}

kClustcentroids.any.trtlive.15 <- sapply(levels(factor(kClusters.any.trtlive.15)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.15)
kClustcentroids.any.trtlive.15
##                                    1            2           3            4
## 1a1_q_002_S1_R1_001       0.53634127 -0.368342751 -0.19308652 -0.994353765
## 1a3_q_004_S3_R1_001      -0.72943329 -0.364642600 -0.73384747 -0.449043022
## 1a7_q_007_d8_S7_R1_001   -0.34271346 -0.002591793 -0.18433094 -0.412446658
## 1a8_q_008_d8_S8_R1_001   -0.25098727 -0.479632975 -0.58712815 -0.436360551
## 1c6_q_028_S22_R1_001     -0.20647922 -0.519653925 -0.47851481  0.424046487
## 1d3_q_038_S27_R1_001     -0.45237747 -0.096021258 -0.33381386 -0.210028815
## 1d5_q_042_S29_R1_001     -0.37921457 -0.316640471 -0.48207801  0.213537259
## 1e4_q_055_S36_R1_001     -0.33018383 -0.352610696 -0.48854598  0.786627982
## 1e5_q_056_S37_R1_001     -0.18964695  2.326526590  1.31284361  3.664484291
## 1e6_q_057_S38_R1_001      0.35748840  0.439742320  0.61379835  0.811489161
## 1e8_q_059_S40_R1_001     -0.57344042 -0.205920280 -0.47119015  0.180898074
## 1f1_q_060_S41_R1_001     -0.14922359 -0.472678895 -0.55313292  0.236231842
## 1f2_q_062_S42_R1_001      0.63651588 -0.509246429 -0.31707501 -0.661529654
## 1f3_q_065_S43_R1_001     -0.23110446 -0.404863815 -0.57974285 -0.066022718
## 1f4_q_066_S44_R1_001     -0.35278525  0.962937779  0.43872224  0.021524021
## 1f5_q_068_S45_R1_001     -0.25588031  1.378049475  1.12313746  1.210274117
## 1g3_q_075_S51_R1_001     -0.44792348 -0.568292653 -0.82905325 -0.890478254
## 1g7_q_082_S55_R1_001      0.72633495 -0.080637410  0.73228387 -0.795123279
## 1h3_q_086_d8_S59_R1_001  -0.29920071 -0.513618704 -0.65232001 -0.527459386
## 1h4_q_090_d8_S60_R1_001  -0.24548146 -0.353938969 -0.50771555 -0.610790228
## 1i2_q_104_S66_R1_001     -0.30096486  2.224491882  1.35764269  1.734753262
## 1i3_q_105_S67_R1_001     -0.46617942 -0.582322052 -0.87902057 -0.221836381
## 1i7_q_110_S71_R1_001     -0.46126166 -0.557292215 -0.70614487 -0.249459795
## 1j3_q_114_S75_R1_001     -0.21487978 -0.353694579 -0.38865178 -0.267270835
## 1j4_q_115_S76_R1_001      0.16895152 -0.424543497 -0.48561996 -0.877063575
## 1j6_q_117_S78_R1_001      0.52681286 -0.289151902 -0.18113877 -0.176544625
## 1j7_q_118_S79_R1_001     -0.37906708 -0.183318695 -0.24738837 -0.145767362
## 1k2_q_121_S82_R1_001     -0.43674604 -0.230598214 -0.30521423 -0.442600889
## 1k4_q_127_S84_R1_001     -0.07200795  0.090102170  0.08678698 -0.376519392
## 1k5_q_128_S85_R1_001     -0.33020048 -0.359593501 -0.46034696 -0.076297079
## 1k6_q_130_S86_R1_001     -0.43061924 -0.398830427 -0.53666735 -0.371111514
## 1k8_q_135_S88_R1_001      0.83710513 -0.631865537 -0.39409446 -0.893247336
## 2a1_q_146_S97_R1_001     -0.49258121  1.995394581  0.93507191  1.953704208
## 2a7_q_153_S103_R1_001    -0.49454133 -0.100675057 -0.26495917 -0.488492153
## 2b1_q_156_S105_R1_001    -0.83180307  0.225015925 -0.53444092  0.573982476
## 2b2_q_158_S106_R1_001    -0.41535292 -0.425755847 -0.74919915  1.606934793
## 2b3_q_160_S107_R1_001     0.46237604 -0.069628419  0.02848632 -0.744101263
## 2b7_q_165_S111_R1_001     1.31756665  0.105270613  0.77115818  0.350984464
## 2b8_q_167_S112_R1_001    -0.72244868  0.264335569 -0.15019604 -0.167280601
## 2c1_q_168_S113_R1_001    -0.82376851 -0.396974197 -0.93171407 -0.193799398
## 2c3_q_171_S115_R1_001    -0.03412986  0.416163950  0.92555485 -0.521674029
## 2c4_q_172_d8_S116_R1_001 -0.72379391 -0.505848865 -0.86466264 -0.433987397
## 2c5_q_173_S117_R1_001    -0.39482248 -0.272782635 -0.59216999  0.490317993
## 2c7_q_178_S119_R1_001    -0.03883303 -0.157560964 -0.28476500 -0.216851306
## 2d2_q_181_S122_R1_001    -0.42054894 -0.467867747 -0.78208875 -0.181534715
## 2d7_q_189_S127_R1_001    -0.41105157 -0.418810528 -0.69233636 -0.011829492
## 2e6_q_200_d8_S134_R1_001 -0.41698501 -0.590588987 -0.91296499 -0.180346610
## 2f6_q_212_S142_R1_001    -0.76474963  0.843697180  0.08605002  0.011474899
## 2g1_q_217_S145_R1_001    -0.42226502  1.182073561  1.08383065  0.082266136
## 2g4_q_221_S148_R1_001    -0.31375669 -0.053062100 -0.02535975  0.162501139
## 2g8_q_229_S152_R1_001    -0.50942934 -0.658486487 -1.02230595 -0.518342486
## 2h1_q_230_S153_R1_001     0.17784394  1.754882779  1.61959457  0.562339030
## 2h4_q_235_S156_R1_001    -0.55138662 -0.646602378 -0.84413910 -0.903795479
## 2h7_q_238_S159_R1_001    -0.33843179 -0.444108789 -0.64674140  0.343929510
## 2i1_q_242_S161_R1_001     0.85864638 -0.520379233 -0.05262601 -0.615346016
## 2i4_q_246_S164_R1_001    -0.35252458 -0.387697794 -0.53293408 -0.291039810
## 2i5_q_247_S165_R1_001     0.33955450 -0.594906691 -0.39929108 -0.972466316
## 2i6_q_247_d8_S166_R1_001 -0.31101400 -0.545855723 -0.68347298 -0.689561140
## 2j1_q_250_S169_R1_001    -0.43318644  0.377964052  0.15656346 -0.121326028
## 2j4_q_254_S172_R1_001     0.86305001 -0.499530146  0.01910866 -0.838443237
## 2j6_q_257_S174_R1_001    -0.40616922 -0.375301396 -0.72059988  0.097759975
## 2j7_q_259_d8_S175_R1_001 -0.43558680  0.113928380 -0.20772906 -0.132822705
## 2k4_q_267_S180_R1_001    -0.43105037 -0.047631737 -0.22701649  0.008246768
## 2l4_q_282_S188_R1_001    -0.63315160  2.155665841  0.59732802  0.742891468
## 3a1_q_289_S193_R1_001    -0.41190937 -0.077217908 -0.07948568 -0.455978853
## 3a4_q_294_S196_R1_001     0.78398092 -0.437152810 -0.11872224 -0.767081698
## 3b5_q_305_S205_R1_001    -0.72196113  0.007554792 -0.35543867 -0.089257169
## 3c2_q_312_S210_R1_001    -0.33162499 -0.472352366 -0.55226939  0.023396453
## 3c5_q_317_S213_R1_001     0.57641327  0.348782501  0.44848551 -0.313256112
## 3c7_q_322_S215_R1_001    -0.39624555  1.305823671  1.23561479  0.731686825
## 3d2_q_325_S218_R1_001    -0.54763350 -0.604152593 -0.91656643 -0.126705755
## 3d6_q_333_S222_R1_001    -0.56314684 -0.517024465 -0.83779909  0.428910295
## 3e2_q_342_S226_R1_001    -0.26427365 -0.113558620  0.01849752 -0.403840155
## 3e3_q_343_S227_R1_001    -0.27599098  1.019333996  0.97316719  0.242189942
## 3e5_q_348_S229_R1_001    -0.43796289 -0.380070760 -0.64120742 -0.065094411
## 3f2_q_353_d8_S234_R1_001  0.02692133 -0.390669979 -0.29049338 -0.527010247
## 3f4_q_356_S236_R1_001    -0.22978766 -0.434507970 -0.67128439 -0.359061548
## 3f5_q_358_S237_R1_001    -0.38407562 -0.565993884 -0.68246776 -0.041817183
## 3f7_q_360_S239_R1_001     0.47297397 -0.496165576 -0.24549460 -0.681585815
## 3f8_q_360_d8_S240_R1_001 -0.57562925 -0.620894611 -1.04225114 -0.077580335
## 3h1_q_372_S249_R1_001    -0.59853361 -0.218548574 -0.56850185  0.831337469
## 3h2_q_374_S250_R1_001    -0.48519100 -0.493134961 -0.90250285 -0.088557852
## 3h3_q_375_d8_S251_R1_001 -0.52797275 -0.611792527 -0.98082736 -0.589994111
## 3h5_q_377_S253_R1_001    -0.33684769  1.487058308  1.11120500  0.773878891
## 3h8_q_387_d8_S256_R1_001 -0.68604048  0.613742015 -0.01976978 -0.205550669
## 3i3_q_391_S259_R1_001     0.05820257  1.550274445  1.64463447  0.479510876
## 3i4_q_392_S260_R1_001    -0.72802460  0.249221374 -0.25598967 -0.298001019
## 3j4_q_403_S268_R1_001     0.12505811 -0.382590734 -0.34636371 -0.879887089
## 3k1_q_411_S273_R1_001    -0.58494815  2.351175657  0.72105754  0.775057112
## 3k6_q_417_S278_R1_001    -0.32989649 -0.634456188 -0.86954787  0.330504035
## 3k7_q_419_S279_R1_001     0.80181082 -0.521094320 -0.20318419 -0.732645594
## 3k8_q_420_S280_R1_001    -0.40191910 -0.347992086 -0.62256802 -0.167776164
## 3l1_q_421_S281_R1_001    -0.80233598 -0.063712225 -0.48436207  0.213300348
## 3l4_q_426_S284_R1_001     0.55970821 -0.421366161 -0.05877164 -1.172096416
## 3l6_q_428_S286_R1_001    -0.56160275 -0.465935417 -0.98668096  1.448146801
## 3l8_q_432_S288_R1_001    -0.60491821 -0.514935554 -0.88741547  0.044958636
##                                     5            6             7           8
## 1a1_q_002_S1_R1_001       0.539067262  0.677494289 -0.2143971100 -0.53950313
## 1a3_q_004_S3_R1_001       0.040619088  0.087791367 -0.3582435129  0.18574802
## 1a7_q_007_d8_S7_R1_001    0.404652920 -0.031086513 -0.1696025316  0.02199784
## 1a8_q_008_d8_S8_R1_001    0.400582886  0.358359368 -0.3610603150  0.14468767
## 1c6_q_028_S22_R1_001     -0.204006982  0.146627590 -0.1992854348  0.49896429
## 1d3_q_038_S27_R1_001      0.116043377 -0.009315873 -0.2560808754 -0.17461624
## 1d5_q_042_S29_R1_001     -0.149526915  0.439373327 -0.1656386329  0.42862123
## 1e4_q_055_S36_R1_001      0.038224369  0.100564248 -0.2286539037  0.63169311
## 1e5_q_056_S37_R1_001     -1.166943317  1.094709970  3.3885981462 -0.41410581
## 1e6_q_057_S38_R1_001     -0.576838712  1.277657687  1.7145476347 -0.38727769
## 1e8_q_059_S40_R1_001     -0.514146497  0.434065043 -0.0379737613  0.11106249
## 1f1_q_060_S41_R1_001     -0.179456731  0.842879312 -0.1518483882  0.53979832
## 1f2_q_062_S42_R1_001      0.636722011  1.547085329 -0.0563625467 -0.67806700
## 1f3_q_065_S43_R1_001      0.541615938  0.435108618 -0.3243500327 -0.08582989
## 1f4_q_066_S44_R1_001      0.101281733 -0.284385993  0.5134341137 -0.54993343
## 1f5_q_068_S45_R1_001     -0.737451460  0.449208744  1.6116234610 -0.61407054
## 1g3_q_075_S51_R1_001      0.398887174  0.381907270 -0.5295963426 -0.23540887
## 1g7_q_082_S55_R1_001     -0.290587913  1.082518078  0.4897295523 -0.69693499
## 1h3_q_086_d8_S59_R1_001   0.588383843 -0.024620396 -0.6454940950 -0.03703038
## 1h4_q_090_d8_S60_R1_001  -0.069585079  0.246217845 -0.2943928720 -0.16844009
## 1i2_q_104_S66_R1_001     -0.994658322  0.945545429  3.2722204713 -0.60872965
## 1i3_q_105_S67_R1_001      0.193382870  0.116934030 -0.5551025380  0.36900677
## 1i7_q_110_S71_R1_001     -0.215146858  0.294320759 -0.3941437161  0.09636140
## 1j3_q_114_S75_R1_001      0.113724542  0.675387520 -0.1559093732  0.13478917
## 1j4_q_115_S76_R1_001      1.371295711  0.391177718 -0.3539140389 -0.55810201
## 1j6_q_117_S78_R1_001     -0.018087061  1.899552830  0.4812962735 -0.58313485
## 1j7_q_118_S79_R1_001      0.398447111  0.022725698 -0.2083738408 -0.20355529
## 1k2_q_121_S82_R1_001     -0.561743095  0.454143394  0.1808581603  0.01105252
## 1k4_q_127_S84_R1_001      0.664380979 -0.244485916 -0.0405808734 -0.17140637
## 1k5_q_128_S85_R1_001     -0.032812063  0.095011268 -0.3007154943  0.38318763
## 1k6_q_130_S86_R1_001      1.157261715 -0.272860962 -0.5994254617 -0.15636388
## 1k8_q_135_S88_R1_001      0.215496211  1.599051388 -0.0431255889 -0.63605575
## 2a1_q_146_S97_R1_001     -0.189928258 -0.168414613  1.5414923023 -0.39197395
## 2a7_q_153_S103_R1_001    -0.343985568  0.420922001  0.0636925973 -0.05242950
## 2b1_q_156_S105_R1_001     0.010042769 -0.186432095 -0.1793395828  0.40173558
## 2b2_q_158_S106_R1_001    -0.120295194  0.053978239 -0.4569879346  0.72588318
## 2b3_q_160_S107_R1_001     0.671494280  0.786073999  0.1709272829 -0.62453585
## 2b7_q_165_S111_R1_001    -0.022319502  2.130054528  1.4027439987 -0.51657160
## 2b8_q_167_S112_R1_001     0.239840129 -0.356583165 -0.1643451768 -0.14477770
## 2c1_q_168_S113_R1_001     0.025999566 -0.182440550 -0.6684025652  0.36193943
## 2c3_q_171_S115_R1_001    -1.116136433  1.234948983  1.7670587467 -0.73326175
## 2c4_q_172_d8_S116_R1_001 -0.119292291 -0.005957709 -0.5402171943 -0.12156965
## 2c5_q_173_S117_R1_001    -0.121020988 -0.170402398 -0.4208972571  0.72028246
## 2c7_q_178_S119_R1_001     0.232869734  0.603297851 -0.0004349371 -0.35876488
## 2d2_q_181_S122_R1_001     0.827200131 -0.033394630 -0.6554408451 -0.15945277
## 2d7_q_189_S127_R1_001     0.010027508  0.211292804 -0.2572713126  0.28735894
## 2e6_q_200_d8_S134_R1_001  0.905203978  0.212931528 -0.6618989220  0.18594488
## 2f6_q_212_S142_R1_001     0.191965931 -0.645735143  0.2388280490 -0.26573640
## 2g1_q_217_S145_R1_001    -0.665440222  0.382282943  1.5592669882 -0.70549001
## 2g4_q_221_S148_R1_001    -0.617580643  0.881305005  0.5430941259  0.20265209
## 2g8_q_229_S152_R1_001     0.194382292  0.010841285 -0.7538151471  0.37799954
## 2h1_q_230_S153_R1_001    -0.339713651  0.722928688  1.8612743117 -0.52856003
## 2h4_q_235_S156_R1_001     0.217532549  0.253942768 -0.5714849028 -0.17463318
## 2h7_q_238_S159_R1_001    -0.005392722 -0.044121166 -0.4550573626  0.85848329
## 2i1_q_242_S161_R1_001    -0.087577491  2.172297235  0.5026312459 -0.55544775
## 2i4_q_246_S164_R1_001     0.481776312  0.164120777 -0.3786469392  0.11271458
## 2i5_q_247_S165_R1_001     0.591413625  0.796099359 -0.5654830811 -0.49802762
## 2i6_q_247_d8_S166_R1_001  0.419115318  0.121352879 -0.6368115704  0.04769499
## 2j1_q_250_S169_R1_001     0.266940037 -0.138808875  0.0852169509 -0.34757126
## 2j4_q_254_S172_R1_001    -0.247674196  2.356729690  0.3845039887 -0.53147570
## 2j6_q_257_S174_R1_001     0.616181740  0.451494655 -0.4330665746 -0.05939335
## 2j7_q_259_d8_S175_R1_001  0.181718026 -0.127443808 -0.0964415552  0.08629102
## 2k4_q_267_S180_R1_001     0.018271534  0.099985999 -0.0602999156  0.35072274
## 2l4_q_282_S188_R1_001    -0.018903608 -0.421113051  1.0788319131 -0.45586458
## 3a1_q_289_S193_R1_001    -0.748195930  0.721441433  0.4615100012 -0.22769141
## 3a4_q_294_S196_R1_001     0.195809731  1.988117850  0.3150733544 -0.51684804
## 3b5_q_305_S205_R1_001     0.721107835 -0.320865476 -0.2349890936 -0.28712353
## 3c2_q_312_S210_R1_001    -0.320220045  0.552212065 -0.1309726455  0.32984993
## 3c5_q_317_S213_R1_001    -0.279544954  2.330074137  1.3635770042 -0.57412300
## 3c7_q_322_S215_R1_001    -1.117137075  0.888030413  2.3605212474 -0.61736477
## 3d2_q_325_S218_R1_001     0.326818325  0.109099816 -0.6696734461  0.54478268
## 3d6_q_333_S222_R1_001    -0.186318607  0.181339785 -0.3041718360  0.36235176
## 3e2_q_342_S226_R1_001     0.324596926  0.297569987 -0.0304286400 -0.43430212
## 3e3_q_343_S227_R1_001    -0.215677112  0.483583365  1.1002692355 -0.51583860
## 3e5_q_348_S229_R1_001    -0.307546344  0.336794663 -0.2084058157  0.47090159
## 3f2_q_353_d8_S234_R1_001  0.012694555  0.782067317 -0.1536500855 -0.02116768
## 3f4_q_356_S236_R1_001     1.269137687  0.339233628 -0.4811330872 -0.22187063
## 3f5_q_358_S237_R1_001     0.375509971  0.169573528 -0.5430411589  0.33714631
## 3f7_q_360_S239_R1_001     0.283998280  1.520244716  0.1363744139 -0.54242632
## 3f8_q_360_d8_S240_R1_001  0.238835213  0.398441046 -0.6031815559  0.13342025
## 3h1_q_372_S249_R1_001     0.234848660 -0.249941417 -0.1299292919  0.46654298
## 3h2_q_374_S250_R1_001     0.998622336 -0.065742108 -0.6122293991 -0.20092775
## 3h3_q_375_d8_S251_R1_001  0.679232827  0.160034724 -0.7759473874 -0.12112787
## 3h5_q_377_S253_R1_001    -0.794322784  0.614481480  2.2312786111 -0.65460516
## 3h8_q_387_d8_S256_R1_001  0.160166582 -0.463193150  0.0531193210  0.01321435
## 3i3_q_391_S259_R1_001    -0.345391391  0.762403234  2.3500257727 -0.73101036
## 3i4_q_392_S260_R1_001     0.112416898 -0.251060874  0.1379385157 -0.38609698
## 3j4_q_403_S268_R1_001     1.008316393  0.078990218 -0.3432753436 -0.65163019
## 3k1_q_411_S273_R1_001     0.055231303 -0.385093836  1.2083381062 -0.42468951
## 3k6_q_417_S278_R1_001     0.312287405  0.268042088 -0.5810819626  0.27613925
## 3k7_q_419_S279_R1_001     0.448564517  1.641204732  0.2670149207 -0.49271413
## 3k8_q_420_S280_R1_001     0.308914699 -0.049097539 -0.2691750233  0.54461119
## 3l1_q_421_S281_R1_001    -0.358518811 -0.177368726  0.1294985411 -0.14216688
## 3l4_q_426_S284_R1_001     0.700467462  0.958371024 -0.2305557163 -0.74678793
## 3l6_q_428_S286_R1_001     0.130651957 -0.467641297 -0.6202664495  0.62938603
## 3l8_q_432_S288_R1_001     0.374263459 -0.423449758 -0.6194770877  0.26929507
##                                     9            10          11          12
## 1a1_q_002_S1_R1_001      -0.093282280 -5.176534e-02 -0.34883557 -0.56861725
## 1a3_q_004_S3_R1_001      -0.116079106 -1.247636e-01  0.94170519  0.65243451
## 1a7_q_007_d8_S7_R1_001   -0.037159747  1.462772e-01 -0.06335732  0.78312661
## 1a8_q_008_d8_S8_R1_001   -0.081197186 -1.466626e-01  0.10727360  0.49219358
## 1c6_q_028_S22_R1_001     -0.069078099  4.886612e-02  0.18259940 -0.27710161
## 1d3_q_038_S27_R1_001     -0.048745253  4.805125e-01 -0.19548676  0.70625374
## 1d5_q_042_S29_R1_001     -0.055987920  3.489170e-01  0.65116637 -0.05901000
## 1e4_q_055_S36_R1_001     -0.052759764  4.179516e-01  0.37093696  0.05516222
## 1e5_q_056_S37_R1_001      0.169004284  2.519392e+00  0.70828887 -0.32746835
## 1e6_q_057_S38_R1_001      0.052503586  5.905900e-01  0.62108269 -0.55602494
## 1e8_q_059_S40_R1_001     -0.031474785  5.515744e-02  1.08996863  0.49458140
## 1f1_q_060_S41_R1_001     -0.065746809  1.167396e-01  0.62615557 -0.54324202
## 1f2_q_062_S42_R1_001     -0.055717381  9.996102e-01 -0.35957937 -1.24154322
## 1f3_q_065_S43_R1_001     -0.086402746  2.539461e-01 -0.19184707  0.05739724
## 1f4_q_066_S44_R1_001     -0.007249898  1.200481e+00 -0.30782811  0.01419741
## 1f5_q_068_S45_R1_001      0.079288396  1.615407e+00  0.15195289  0.14481544
## 1g3_q_075_S51_R1_001     -0.151793393 -3.138769e-01 -0.02858004  0.64853393
## 1g7_q_082_S55_R1_001      0.044371834  1.145119e+00 -0.26891610 -0.40455248
## 1h3_q_086_d8_S59_R1_001  -0.124803185 -2.154913e-01 -0.23139238  0.68496147
## 1h4_q_090_d8_S60_R1_001  -0.102455314  3.308598e-01 -0.04576717  0.35472691
## 1i2_q_104_S66_R1_001      0.133690497  2.248935e+00  0.84169859 -0.52643463
## 1i3_q_105_S67_R1_001     -0.130658053 -1.482697e-01  0.22599946 -0.03739119
## 1i7_q_110_S71_R1_001     -0.083746561 -9.210903e-02  0.62034624 -0.09702181
## 1j3_q_114_S75_R1_001     -0.077501458  1.515651e-01  0.27231685  0.12428751
## 1j4_q_115_S76_R1_001     -0.133011108  2.020132e-01 -0.49680674 -0.60262090
## 1j6_q_117_S78_R1_001     -0.112935758  8.220863e-01  0.11376531 -1.02032081
## 1j7_q_118_S79_R1_001     -0.052598136  5.079476e-01 -0.08361925  0.66913647
## 1k2_q_121_S82_R1_001     -0.053606485  2.493880e-01  1.82498751  0.47561883
## 1k4_q_127_S84_R1_001     -0.077257959  7.125345e-01 -0.62147132 -0.08680889
## 1k5_q_128_S85_R1_001     -0.070930687 -1.359256e-01  0.14418892  0.04562564
## 1k6_q_130_S86_R1_001     -0.127907807 -1.448528e-01 -0.58628876  0.31914361
## 1k8_q_135_S88_R1_001     -0.124636749  2.706583e-01 -0.50408566 -1.34011699
## 2a1_q_146_S97_R1_001      0.098963253  1.840869e+00 -0.37214945  0.09847402
## 2a7_q_153_S103_R1_001    -0.060308288  2.373909e-01  0.29550810  1.08427959
## 2b1_q_156_S105_R1_001     0.020417496  5.165598e-01  1.34722427  0.10143625
## 2b2_q_158_S106_R1_001    -0.044175479  2.729903e-02  0.26637128 -0.14273100
## 2b3_q_160_S107_R1_001    -0.085407624  9.417926e-01 -0.63745644 -0.82364966
## 2b7_q_165_S111_R1_001    -0.004654114  1.024787e+00 -0.09374060 -1.19312629
## 2b8_q_167_S112_R1_001    -0.033905863  5.819147e-01 -0.16182604  1.39743279
## 2c1_q_168_S113_R1_001    -0.124475165  6.016021e-02 -0.11215911  0.44920249
## 2c3_q_171_S115_R1_001    -0.034510354  1.367030e+00  0.56353121 -0.41044940
## 2c4_q_172_d8_S116_R1_001 -0.095251138  1.365371e-01 -0.02622658  0.34165606
## 2c5_q_173_S117_R1_001    -0.035500637  8.756770e-03 -0.09356680  0.61272865
## 2c7_q_178_S119_R1_001    -0.016560151  5.200298e-01 -0.24421700 -0.65648135
## 2d2_q_181_S122_R1_001    -0.149532783  1.434138e-01 -0.43031295  0.35164777
## 2d7_q_189_S127_R1_001    -0.096918263  2.050379e-01  1.08077443  0.53148342
## 2e6_q_200_d8_S134_R1_001 -0.134266348 -3.278021e-01 -0.20291925  0.12694939
## 2f6_q_212_S142_R1_001    -0.020883054  8.998193e-01 -0.48309896  0.10393443
## 2g1_q_217_S145_R1_001     0.000452660  1.231264e+00  0.04999153 -0.04108279
## 2g4_q_221_S148_R1_001    -0.010577161  8.197523e-01  2.38328414  0.09548876
## 2g8_q_229_S152_R1_001    -0.160633270 -2.690822e-01  0.04643640 -0.11938996
## 2h1_q_230_S153_R1_001     0.089167007  2.501200e+00 -0.28724704 -0.50284244
## 2h4_q_235_S156_R1_001    -0.139950424 -3.439501e-01  0.06441462  0.29613410
## 2h7_q_238_S159_R1_001    -0.092584396 -2.518427e-02  0.06912832  0.06841295
## 2i1_q_242_S161_R1_001    -0.111334432  4.316169e-01  0.16525850 -0.98617977
## 2i4_q_246_S164_R1_001    -0.049676881  4.471470e-01 -0.26878097  0.05834035
## 2i5_q_247_S165_R1_001    -0.161766417 -7.010678e-02 -0.63663703 -0.65036747
## 2i6_q_247_d8_S166_R1_001 -0.131000151 -1.220562e-01 -0.40779877  0.02149601
## 2j1_q_250_S169_R1_001    -0.020841218  7.905999e-01 -0.21862400  1.03404855
## 2j4_q_254_S172_R1_001    -0.065964790  6.212782e-01  0.23359327 -0.95166977
## 2j6_q_257_S174_R1_001    -0.096266234  3.150204e-01 -0.37057896  0.55344924
## 2j7_q_259_d8_S175_R1_001 -0.034791105  2.682403e-01 -0.19729897  0.78426642
## 2k4_q_267_S180_R1_001    -0.056755778  3.372559e-01  0.20441459  0.52875164
## 2l4_q_282_S188_R1_001     0.046801924  1.049270e+00 -0.32111711  0.55330024
## 3a1_q_289_S193_R1_001    -0.016283330  7.839718e-01  3.42570105  0.63719837
## 3a4_q_294_S196_R1_001    -0.055028889  5.579441e-01 -0.23569255 -0.96901947
## 3b5_q_305_S205_R1_001    -0.080026792  5.550273e-01 -0.29669335  1.14746830
## 3c2_q_312_S210_R1_001    -0.096335989  4.966010e-01  0.48032752 -0.03636153
## 3c5_q_317_S213_R1_001     0.067465219  1.374733e+00  2.19094503 -0.82224510
## 3c7_q_322_S215_R1_001     0.060876243  1.269154e+00  0.92430894  0.18564491
## 3d2_q_325_S218_R1_001    -0.138484576 -6.384438e-02 -0.11976292  0.02673022
## 3d6_q_333_S222_R1_001    -0.104406092 -8.661356e-02  1.48500087  0.20381363
## 3e2_q_342_S226_R1_001    -0.095560903  6.994399e-01 -0.26723222  0.13659555
## 3e3_q_343_S227_R1_001     0.032227089  1.680994e+00 -0.35020467  0.15300660
## 3e5_q_348_S229_R1_001    -0.081650992  4.287000e-02  1.34357814 -0.17930351
## 3f2_q_353_d8_S234_R1_001 -0.064847877  3.886854e-01 -0.08032344  0.44572455
## 3f4_q_356_S236_R1_001    -0.150233467  1.266909e-01 -0.60459470  0.33045496
## 3f5_q_358_S237_R1_001    -0.123950084  3.009540e-02 -0.01938314 -0.05089865
## 3f7_q_360_S239_R1_001    -0.139060952  4.220418e-01 -0.12443001 -1.12864352
## 3f8_q_360_d8_S240_R1_001 -0.111862430 -1.239111e-01 -0.22884629 -0.30464463
## 3h1_q_372_S249_R1_001    -0.072104201  3.296868e-01  0.34024577 -0.03622522
## 3h2_q_374_S250_R1_001    -0.116032527  9.670143e-02 -0.31180552  0.50418150
## 3h3_q_375_d8_S251_R1_001 -0.140375672 -2.074910e-01 -0.39846030  0.62424176
## 3h5_q_377_S253_R1_001     0.018970396  1.318443e+00  0.22277241  0.02751369
## 3h8_q_387_d8_S256_R1_001 -0.014324360  3.825759e-01 -0.30853976  1.19531173
## 3i3_q_391_S259_R1_001     0.057304289  2.326290e+00 -0.35371172 -0.43305662
## 3i4_q_392_S260_R1_001     0.028287038  7.198527e-01  0.12634536  0.85456714
## 3j4_q_403_S268_R1_001    -0.139587775  1.941833e-05 -0.65101120 -0.58761576
## 3k1_q_411_S273_R1_001     0.076548669  9.836930e-01 -0.40196052  0.46398294
## 3k6_q_417_S278_R1_001    -0.130312190 -1.160545e-01 -0.17440373  0.01745052
## 3k7_q_419_S279_R1_001    -0.106135258  2.449997e-01 -0.31526122 -1.26394061
## 3k8_q_420_S280_R1_001    -0.073679357 -1.515619e-01  0.30335566  0.23833979
## 3l1_q_421_S281_R1_001    -0.087011670  3.735254e-01  0.48761750  0.65817262
## 3l4_q_426_S284_R1_001    -0.141674050  3.414606e-01 -0.66480069 -0.82335483
## 3l6_q_428_S286_R1_001    -0.106064562 -2.877813e-01 -0.07293916 -0.35535915
## 3l8_q_432_S288_R1_001    -0.128205289 -1.151206e-01 -0.21836920  0.15015978
##                                    13           14          15
## 1a1_q_002_S1_R1_001      -0.618056948 -0.317859857 -0.51652672
## 1a3_q_004_S3_R1_001       0.061626282 -0.315313752 -0.67408007
## 1a7_q_007_d8_S7_R1_001    0.143372033  0.066701371 -0.05687140
## 1a8_q_008_d8_S8_R1_001   -0.161627150 -0.378009042 -0.65386741
## 1c6_q_028_S22_R1_001     -0.168973900 -0.341682014 -0.49404419
## 1d3_q_038_S27_R1_001     -0.263670768  0.002871791 -0.43285776
## 1d5_q_042_S29_R1_001      0.047844090 -0.210012529 -0.35591706
## 1e4_q_055_S36_R1_001     -0.081467264 -0.226135370 -0.28430667
## 1e5_q_056_S37_R1_001      2.058234646  0.791298137  0.27423758
## 1e6_q_057_S38_R1_001      1.204780543  0.164863565 -0.05210617
## 1e8_q_059_S40_R1_001      0.575539815 -0.152007939 -0.26483685
## 1f1_q_060_S41_R1_001      0.002132848 -0.330578777 -0.58185519
## 1f2_q_062_S42_R1_001     -0.619960180 -0.524693407 -0.99049212
## 1f3_q_065_S43_R1_001     -0.417111446 -0.317789717 -0.74568299
## 1f4_q_066_S44_R1_001     -0.019996496  0.573943917 -0.05858046
## 1f5_q_068_S45_R1_001      0.876268565  0.738361579  0.15867519
## 1g3_q_075_S51_R1_001     -0.472540766 -0.419812091 -0.91213395
## 1g7_q_082_S55_R1_001      0.238462433  0.035713957 -0.02035499
## 1h3_q_086_d8_S59_R1_001  -0.493979152 -0.330641528 -0.62490995
## 1h4_q_090_d8_S60_R1_001  -0.356539576 -0.251291104 -0.58832398
## 1i2_q_104_S66_R1_001      1.778294746  0.789069963  0.16460413
## 1i3_q_105_S67_R1_001     -0.517332376 -0.437824820 -0.73841387
## 1i7_q_110_S71_R1_001     -0.047185778 -0.394919454 -0.62227188
## 1j3_q_114_S75_R1_001      0.032600834 -0.317618183 -0.45804551
## 1j4_q_115_S76_R1_001     -0.799092586 -0.411244255 -0.81179802
## 1j6_q_117_S78_R1_001     -0.149304842 -0.394074424 -0.86516746
## 1j7_q_118_S79_R1_001     -0.170127437 -0.105106591 -0.54878279
## 1k2_q_121_S82_R1_001      0.725768695 -0.165199188 -0.21906889
## 1k4_q_127_S84_R1_001     -0.329831274  0.071400807 -0.20226734
## 1k5_q_128_S85_R1_001     -0.122775559 -0.244899019 -0.46317531
## 1k6_q_130_S86_R1_001     -0.741065646 -0.239531004 -0.64522512
## 1k8_q_135_S88_R1_001     -0.847930010 -0.531109183 -0.87106276
## 2a1_q_146_S97_R1_001      0.480166377  0.974063717  0.27681064
## 2a7_q_153_S103_R1_001     0.430094127 -0.131868939 -0.34328247
## 2b1_q_156_S105_R1_001     0.208616945  0.266985362 -0.17465895
## 2b2_q_158_S106_R1_001    -0.451346791 -0.184238273 -0.22039511
## 2b3_q_160_S107_R1_001    -0.615265372 -0.172698429 -0.68921737
## 2b7_q_165_S111_R1_001     0.342997505 -0.050640576 -0.37665305
## 2b8_q_167_S112_R1_001    -0.133013899  0.237574812 -0.23223730
## 2c1_q_168_S113_R1_001    -0.733247860 -0.343548571 -0.83934275
## 2c3_q_171_S115_R1_001     1.267037813  0.114552079 -0.02903776
## 2c4_q_172_d8_S116_R1_001 -0.667289382 -0.328728745 -0.90195783
## 2c5_q_173_S117_R1_001    -0.356786155 -0.115190660 -0.38269631
## 2c7_q_178_S119_R1_001    -0.460411927 -0.156163676 -0.78706864
## 2d2_q_181_S122_R1_001    -0.833896551 -0.311534038 -0.82254930
## 2d7_q_189_S127_R1_001    -0.046002912 -0.295189918 -0.55149890
## 2e6_q_200_d8_S134_R1_001 -0.756098631 -0.400403026 -0.74454360
## 2f6_q_212_S142_R1_001    -0.241216889  0.492485915 -0.24030585
## 2g1_q_217_S145_R1_001     0.812910112  0.593909770  0.05125145
## 2g4_q_221_S148_R1_001     1.291466350  0.005228630 -0.15643011
## 2g8_q_229_S152_R1_001    -0.766211856 -0.467235003 -0.85053595
## 2h1_q_230_S153_R1_001     0.562698016  0.876327332  0.12806370
## 2h4_q_235_S156_R1_001    -0.485992956 -0.436662428 -0.77300581
## 2h7_q_238_S159_R1_001    -0.369033053 -0.268076032 -0.37383992
## 2i1_q_242_S161_R1_001     0.038509046 -0.459760067 -0.77776313
## 2i4_q_246_S164_R1_001    -0.533630897 -0.229712539 -0.57016704
## 2i5_q_247_S165_R1_001    -0.799082075 -0.426051376 -0.86893494
## 2i6_q_247_d8_S166_R1_001 -0.687284120 -0.312494553 -0.63673485
## 2j1_q_250_S169_R1_001     0.089982056  0.291234820 -0.10870803
## 2j4_q_254_S172_R1_001     0.194270376 -0.420000714 -0.86858815
## 2j6_q_257_S174_R1_001    -0.657327055 -0.278145883 -0.85211410
## 2j7_q_259_d8_S175_R1_001 -0.106303604  0.152266749 -0.14496585
## 2k4_q_267_S180_R1_001     0.094417722 -0.018258118 -0.17030353
## 2l4_q_282_S188_R1_001     0.250933378  0.975855812  0.03085169
## 3a1_q_289_S193_R1_001     1.400470539 -0.067761685 -0.26256753
## 3a4_q_294_S196_R1_001    -0.239724605 -0.368452449 -0.86744076
## 3b5_q_305_S205_R1_001    -0.319437014  0.086747517 -0.38804262
## 3c2_q_312_S210_R1_001     0.012589829 -0.316039444 -0.46644314
## 3c5_q_317_S213_R1_001     1.226777048  0.011264463 -0.34394950
## 3c7_q_322_S215_R1_001     1.668555341  0.672577021  0.21954537
## 3d2_q_325_S218_R1_001    -0.666227694 -0.394499683 -0.81959434
## 3d6_q_333_S222_R1_001    -0.013669411 -0.358124352 -0.49915947
## 3e2_q_342_S226_R1_001    -0.045954700 -0.033187611 -0.49996014
## 3e3_q_343_S227_R1_001     0.485928482  0.555416493 -0.18016747
## 3e5_q_348_S229_R1_001     0.224930323 -0.293387936 -0.54351155
## 3f2_q_353_d8_S234_R1_001 -0.202576501 -0.228523424 -0.43499216
## 3f4_q_356_S236_R1_001    -0.827363518 -0.375055265 -0.78179055
## 3f5_q_358_S237_R1_001    -0.537655884 -0.395169232 -0.55933403
## 3f7_q_360_S239_R1_001    -0.191475754 -0.460926642 -0.90303976
## 3f8_q_360_d8_S240_R1_001 -0.825575218 -0.465522214 -0.85226167
## 3h1_q_372_S249_R1_001    -0.165114295 -0.129721856 -0.29416075
## 3h2_q_374_S250_R1_001    -0.893016236 -0.370498849 -0.85253780
## 3h3_q_375_d8_S251_R1_001 -0.902307327 -0.431184662 -0.99239465
## 3h5_q_377_S253_R1_001     1.256878568  0.486025322  0.11236429
## 3h8_q_387_d8_S256_R1_001 -0.228351211  0.448732680 -0.06301181
## 3i3_q_391_S259_R1_001     0.650287022  0.703084556  0.05047647
## 3i4_q_392_S260_R1_001    -0.016681100  0.161582518 -0.50008435
## 3j4_q_403_S268_R1_001    -0.944146291 -0.322606776 -0.74680498
## 3k1_q_411_S273_R1_001     0.215636521  1.123198209  0.14790358
## 3k6_q_417_S278_R1_001    -0.791323421 -0.449720527 -0.70275873
## 3k7_q_419_S279_R1_001    -0.478739079 -0.480384557 -0.83395146
## 3k8_q_420_S280_R1_001    -0.143910786 -0.266092041 -0.41753642
## 3l1_q_421_S281_R1_001     0.174104505 -0.104245722 -0.44243229
## 3l4_q_426_S284_R1_001    -0.824282026 -0.341787120 -0.75698953
## 3l6_q_428_S286_R1_001    -0.761535128 -0.309627067 -0.52866440
## 3l8_q_432_S288_R1_001    -0.715494321 -0.416918935 -0.58849734

Plotting the centroids to see how they behave: tidyverse version

library(glue)
# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.15 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.15.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:1440)[!duplicated(.$group.cluster)]) 
# plot
p15.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): fifteen clusters",color = "Cluster",y="scaled expression level") 
p15.any.trtlive

ggsave(p15.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.15clusters.png",width=11,height=15)

GO ORA of each cluster

# 8 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760            5.623992e-07                1.0000000          3
## 359  GO:0006355            1.096760e-06                0.9999997         27
## 1449 GO:0015714            1.949991e-06                1.0000000          3
## 2143 GO:0035436            3.151415e-06                1.0000000          3
## 1448 GO:0015713            6.685127e-06                1.0000000          3
##      numInCat                                       term ontology
## 1462        5              glucose-6-phosphate transport       BP
## 359      2992 regulation of transcription, DNA-templated       BP
## 1449        7              phosphoenolpyruvate transport       BP
## 2143        8   triose phosphate transmembrane transport       BP
## 1448       10   phosphoglycerate transmembrane transport       BP
##      over_represented_padjust
## 1462              0.002077812
## 359               0.002077812
## 1449              0.002462839
## 2143              0.002985178
## 1448              0.005065989
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120            3.933902e-14                1.0000000         10
## 2253 GO:0042742            8.640276e-12                1.0000000         30
## 1166 GO:0010112            1.217158e-11                1.0000000          8
## 890  GO:0009611            3.002585e-10                1.0000000         21
## 638  GO:0006952            2.886080e-09                1.0000000         34
## 2750 GO:0050832            8.711643e-08                1.0000000         18
## 557  GO:0006749            2.512131e-07                1.0000000          8
## 853  GO:0009407            3.046529e-07                1.0000000          7
## 1210 GO:0010200            1.093472e-06                0.9999998         13
## 899  GO:0009626            1.204013e-06                0.9999999         10
## 894  GO:0009617            2.637069e-06                0.9999996         12
## 900  GO:0009627            3.095308e-06                0.9999997          8
## 3438 GO:1900056            3.275946e-06                0.9999999          5
## 2237 GO:0042538            9.409826e-06                0.9999990          8
## 1031 GO:0009863            2.617001e-05                0.9999987          5
## 933  GO:0009684            2.626258e-05                0.9999993          4
## 619  GO:0006887            3.813169e-05                0.9999961          7
## 485  GO:0006569            4.543349e-05                0.9999995          3
## 2986 GO:0062034            6.221247e-05                0.9999992          3
## 2400 GO:0044419            6.855688e-05                0.9999991          3
## 898  GO:0009625            8.048379e-05                0.9999926          6
## 3708 GO:2000022            1.235004e-04                0.9999877          6
## 2034 GO:0034052            1.235119e-04                0.9999978          3
## 972  GO:0009751            2.045166e-04                0.9999533         11
## 477  GO:0006561            2.669627e-04                0.9999933          3
## 932  GO:0009682            2.729335e-04                0.9999850          4
## 185  GO:0002229            3.590825e-04                0.9999460          7
## 646  GO:0006979            3.684017e-04                0.9998966         13
## 1001 GO:0009816            3.687152e-04                0.9999547          6
## 32   GO:0000162            3.720090e-04                0.9999777          4
## 2230 GO:0042372            4.057400e-04                0.9999876          3
##      numInCat                                                      term
## 1174       27                            camalexin biosynthetic process
## 2253      726                             defense response to bacterium
## 1166       22                regulation of systemic acquired resistance
## 890       419                                      response to wounding
## 638      1165                                          defense response
## 2750      469                                defense response to fungus
## 557        85                             glutathione metabolic process
## 853        67                                   toxin catabolic process
## 1210      286                                        response to chitin
## 899       140                        plant-type hypersensitive response
## 894       241                                     response to bacterium
## 900       104                              systemic acquired resistance
## 3438       26                    negative regulation of leaf senescence
## 2237      132                            hyperosmotic salinity response
## 1031       38                 salicylic acid mediated signaling pathway
## 933        17                    indoleacetic acid biosynthetic process
## 619        96                                                exocytosis
## 485         7                              tryptophan catabolic process
## 2986        8                     L-pipecolic acid biosynthetic process
## 2400        8                interspecies interaction between organisms
## 898        84                                        response to insect
## 3708       89    regulation of jasmonic acid mediated signaling pathway
## 2034       10 positive regulation of plant-type hypersensitive response
## 972       347                                response to salicylic acid
## 477        13                              proline biosynthetic process
## 932        33                               induced systemic resistance
## 185       126                             defense response to oomycetes
## 646       502                              response to oxidative stress
## 1001       97   defense response to bacterium, incompatible interaction
## 32         35                           tryptophan biosynthetic process
## 2230       18                        phylloquinone biosynthetic process
##      ontology over_represented_padjust
## 1174       BP             1.490555e-10
## 2253       BP             1.537270e-08
## 1166       BP             1.537270e-08
## 890        BP             2.844199e-07
## 638        BP             2.187072e-06
## 2750       BP             5.501403e-05
## 557        BP             1.359781e-04
## 853        BP             1.442912e-04
## 1210       BP             4.562004e-04
## 899        BP             4.562004e-04
## 894        BP             9.083503e-04
## 900        BP             9.548123e-04
## 3438       BP             9.548123e-04
## 2237       BP             2.546702e-03
## 1031       BP             6.219308e-03
## 933        BP             6.219308e-03
## 619        BP             8.498880e-03
## 485        BP             9.563750e-03
## 2986       BP             1.240648e-02
## 2400       BP             1.298810e-02
## 898        BP             1.452157e-02
## 3708       BP             2.034725e-02
## 2034       BP             2.034725e-02
## 972        BP             3.228805e-02
## 477        BP             3.977480e-02
## 932        BP             3.977480e-02
## 185        BP             4.698474e-02
## 646        BP             4.698474e-02
## 1001       BP             4.698474e-02
## 32         BP             4.698474e-02
## 2230       BP             4.959190e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            1.343043e-10                1.0000000         22
## 3164 GO:0071732            1.889538e-10                1.0000000         11
## 3094 GO:0071281            1.613934e-09                1.0000000         12
## 359  GO:0006355            2.926398e-07                1.0000000         76
## 914  GO:0009644            3.813626e-07                1.0000000         11
## 3125 GO:0071456            4.645253e-07                1.0000000          7
## 3114 GO:0071369            2.507765e-06                0.9999998          8
## 638  GO:0006952            4.345368e-06                0.9999982         38
## 1166 GO:0010112            1.355092e-05                0.9999994          5
## 1039 GO:0009873            1.550994e-05                0.9999960         17
## 170  GO:0001944            2.120295e-05                0.9999985          6
## 853  GO:0009407            2.365588e-05                0.9999978          7
## 1919 GO:0032268            2.430717e-05                0.9999998          3
## 854  GO:0009408            3.627089e-05                0.9999909         15
## 3708 GO:2000022            3.991164e-05                0.9999950          8
## 2239 GO:0042542            8.277225e-05                0.9999865          9
## 1270 GO:0010286            8.336564e-05                0.9999883          8
## 646  GO:0006979            1.072674e-04                0.9999647         19
## 2795 GO:0051259            1.232705e-04                0.9999947          4
## 557  GO:0006749            1.465446e-04                0.9999812          7
## 2253 GO:0042742            2.068113e-04                0.9999181         24
## 3117 GO:0071398            2.323324e-04                0.9999950          3
##      numInCat                                                   term ontology
## 1210      286                                     response to chitin       BP
## 3164       52                      cellular response to nitric oxide       BP
## 3094       77                          cellular response to iron ion       BP
## 359      2992             regulation of transcription, DNA-templated       BP
## 914       109                       response to high light intensity       BP
## 3125       35                           cellular response to hypoxia       BP
## 3114       62                 cellular response to ethylene stimulus       BP
## 638      1165                                       defense response       BP
## 1166       22             regulation of systemic acquired resistance       BP
## 1039      364                   ethylene-activated signaling pathway       BP
## 170        41                                vasculature development       BP
## 853        67                                toxin catabolic process       BP
## 1919        5       regulation of cellular protein metabolic process       BP
## 854       305                                       response to heat       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
## 2239      130                          response to hydrogen peroxide       BP
## 1270       99                                       heat acclimation       BP
## 646       502                           response to oxidative stress       BP
## 2795       22                        protein complex oligomerization       BP
## 557        85                          glutathione metabolic process       BP
## 2253      726                          defense response to bacterium       BP
## 3117        9                        cellular response to fatty acid       BP
##      over_represented_padjust
## 1210             3.579730e-07
## 3164             3.579730e-07
## 3094             2.038399e-06
## 359              2.772030e-04
## 914              2.889966e-04
## 3125             2.933477e-04
## 3114             1.357418e-03
## 638              2.058075e-03
## 1166             5.704937e-03
## 1039             5.876717e-03
## 170              7.084606e-03
## 853              7.084606e-03
## 1919             7.084606e-03
## 854              9.816458e-03
## 3708             1.008168e-02
## 2239             1.858073e-02
## 1270             1.858073e-02
## 646              2.257979e-02
## 2795             2.458274e-02
## 557              2.776287e-02
## 2253             3.731466e-02
## 3117             4.001398e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816            1.135827e-05                0.9999990          7
## 890  GO:0009611            1.193363e-05                0.9999976         13
## 3251 GO:0080142            2.049805e-05                0.9999995          4
## 1205 GO:0010193            3.256854e-05                0.9999982          5
## 187  GO:0002237            5.567136e-05                0.9999952          6
## 638  GO:0006952            6.623245e-05                0.9999771         22
## 1210 GO:0010200            8.592369e-05                0.9999857          9
## 973  GO:0009753            1.046543e-04                0.9999800         10
##      numInCat                                                    term ontology
## 1001       97 defense response to bacterium, incompatible interaction       BP
## 890       419                                    response to wounding       BP
## 3251       19       regulation of salicylic acid biosynthetic process       BP
## 1205       54                                       response to ozone       BP
## 187        88                response to molecule of bacterial origin       BP
## 638      1165                                        defense response       BP
## 1210      286                                      response to chitin       BP
## 973       338                               response to jasmonic acid       BP
##      over_represented_padjust
## 1001               0.02260826
## 890                0.02260826
## 3251               0.02588904
## 1205               0.03085055
## 187                0.04182579
## 638                0.04182579
## 1210               0.04650927
## 973                0.04956691
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            8.757706e-16                1.0000000         30
## 3708 GO:2000022            7.994984e-11                1.0000000         10
## 423  GO:0006468            5.373869e-10                1.0000000         37
## 2750 GO:0050832            2.165212e-09                1.0000000         17
## 638  GO:0006952            8.977942e-09                1.0000000         28
## 1857 GO:0031347            6.083376e-08                1.0000000          8
## 26   GO:0000103            2.373158e-07                1.0000000          5
## 890  GO:0009611            7.356536e-07                0.9999999         14
## 1210 GO:0010200            3.530396e-06                0.9999996         10
## 1174 GO:0010120            2.304231e-05                0.9999993          4
## 642  GO:0006970            3.054236e-05                0.9999956          9
## 3009 GO:0070417            3.406753e-05                0.9999981          5
## 973  GO:0009753            3.498600e-05                0.9999942         10
## 1183 GO:0010150            8.085242e-05                0.9999885          8
## 1858 GO:0031348            9.938846e-05                0.9999931          5
## 3442 GO:1900067            1.076857e-04                0.9999998          2
## 2223 GO:0042344            1.092568e-04                0.9999979          3
## 972  GO:0009751            1.274416e-04                0.9999777          9
## 2917 GO:0055114            1.933095e-04                0.9999217         26
##      numInCat                                                   term ontology
## 2253      726                          defense response to bacterium       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
## 423      1484                                protein phosphorylation       BP
## 2750      469                             defense response to fungus       BP
## 638      1165                                       defense response       BP
## 1857       95                         regulation of defense response       BP
## 26         24                                   sulfate assimilation       BP
## 890       419                                   response to wounding       BP
## 1210      286                                     response to chitin       BP
## 1174       27                         camalexin biosynthetic process       BP
## 642       252                             response to osmotic stress       BP
## 3009       54                              cellular response to cold       BP
## 973       338                              response to jasmonic acid       BP
## 1183      219                                        leaf senescence       BP
## 1858       64                negative regulation of defense response       BP
## 3442        3         regulation of cellular response to alkaline pH       BP
## 2223       15                 indole glucosinolate catabolic process       BP
## 972       347                             response to salicylic acid       BP
## 2917     1923                            oxidation-reduction process       BP
##      over_represented_padjust
## 2253             3.318295e-12
## 3708             1.514650e-07
## 423              6.787196e-07
## 2750             2.050997e-06
## 638              6.803485e-06
## 1857             3.841652e-05
## 26               1.284556e-04
## 890              3.484239e-04
## 1210             1.486297e-03
## 1174             8.730730e-03
## 642              1.019707e-02
## 3009             1.019707e-02
## 973              1.019707e-02
## 1183             2.188213e-02
## 1858             2.435142e-02
## 3442             2.435142e-02
## 2223             2.435142e-02
## 972              2.682646e-02
## 2917             3.854999e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            5.357720e-16                1.0000000         10
## 1181 GO:0010143            9.648414e-08                1.0000000          5
## 514  GO:0006629            1.779627e-07                1.0000000          9
## 2917 GO:0055114            3.330532e-07                0.9999999         23
## 515  GO:0006631            1.472111e-05                0.9999993          5
## 953  GO:0009725            1.731199e-05                0.9999998          3
## 1531 GO:0016042            2.458469e-05                0.9999976          7
## 2136 GO:0035336            4.497929e-05                0.9999994          3
##      numInCat                                        term ontology
## 1291       41                suberin biosynthetic process       BP
## 1181       29                  cutin biosynthetic process       BP
## 514       220                     lipid metabolic process       BP
## 2917     1923                 oxidation-reduction process       BP
## 515        85                fatty acid metabolic process       BP
## 953        14                         response to hormone       BP
## 1531      229                     lipid catabolic process       BP
## 2136       17 long-chain fatty-acyl-CoA metabolic process       BP
##      over_represented_padjust
## 1291             2.030040e-12
## 1181             1.827892e-04
## 514              2.247668e-04
## 2917             3.154847e-04
## 515              1.093252e-02
## 953              1.093252e-02
## 1531             1.330734e-02
## 2136             2.130332e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441            1.046761e-06                1.0000000          3
## 646  GO:0006979            1.940648e-06                0.9999998         10
## 1292 GO:0010350            3.796980e-05                0.9999999          2
## 3096 GO:0071286            3.796980e-05                0.9999999          2
## 3104 GO:0071325            3.796980e-05                0.9999999          2
## 3197 GO:0072709            3.796980e-05                0.9999999          2
## 1838 GO:0031115            6.036162e-05                0.9999999          2
## 736  GO:0007568            8.378585e-05                0.9999965          4
## 1572 GO:0016310            9.069704e-05                0.9999832         10
## 2844 GO:0051592            9.983663e-05                0.9999997          2
## 1840 GO:0031117            1.039244e-04                0.9999996          2
## 3201 GO:0075733            1.069764e-04                0.9999996          2
## 3093 GO:0071280            1.177608e-04                0.9999995          2
## 2162 GO:0035865            1.203759e-04                0.9999995          2
## 1619 GO:0018008            1.219134e-04                0.9999995          2
## 1248 GO:0010248            1.574567e-04                0.9999993          2
## 452  GO:0006520            2.210690e-04                0.9999881          4
##      numInCat
## 1695        7
## 646       502
## 1292        4
## 3096        4
## 3104        4
## 3197        4
## 1838        5
## 736        85
## 1572      685
## 2844        6
## 1840        7
## 3201        7
## 3093        7
## 2162        7
## 1619        6
## 1248        6
## 452        90
##                                                                        term
## 1695                             tryptophan catabolic process to kynurenine
## 646                                            response to oxidative stress
## 1292                              cellular response to magnesium starvation
## 3096                                     cellular response to magnesium ion
## 3104                                 cellular response to mannitol stimulus
## 3197                                          cellular response to sorbitol
## 1838                      negative regulation of microtubule polymerization
## 736                                                                   aging
## 1572                                                        phosphorylation
## 2844                                                response to calcium ion
## 1840                    positive regulation of microtubule depolymerization
## 3201                                       intracellular transport of virus
## 3093                                        cellular response to copper ion
## 2162                                     cellular response to potassium ion
## 1619                           N-terminal peptidyl-glycine N-myristoylation
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 452                                   cellular amino acid metabolic process
##      ontology over_represented_padjust
## 1695       BP              0.003676557
## 646        BP              0.003676557
## 1292       BP              0.023977928
## 3096       BP              0.023977928
## 3104       BP              0.023977928
## 3197       BP              0.023977928
## 1838       BP              0.030795317
## 736        BP              0.030795317
## 1572       BP              0.030795317
## 2844       BP              0.030795317
## 1840       BP              0.030795317
## 3201       BP              0.030795317
## 3093       BP              0.030795317
## 2162       BP              0.030795317
## 1619       BP              0.030795317
## 1248       BP              0.037287711
## 452        BP              0.049272379
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv")

# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.15) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3114 GO:0071369            0.000000e+00                1.0000000          8
## 3164 GO:0071732            0.000000e+00                1.0000000          8
## 3094 GO:0071281            5.887219e-11                1.0000000          8
## 2907 GO:0055072            1.403391e-06                1.0000000          5
## 359  GO:0006355            9.616408e-06                0.9999972         22
## 1195 GO:0010167            1.116999e-05                0.9999997          4
## 613  GO:0006880            1.510212e-05                0.9999998          3
## 1760 GO:0030001            2.151165e-05                0.9999989          5
## 1764 GO:0030026            2.323359e-05                0.9999997          3
## 254  GO:0006096            5.543201e-05                0.9999966          5
## 612  GO:0006879            6.469221e-05                0.9999989          3
## 1114 GO:0010039            7.167439e-05                0.9999987          3
## 1952 GO:0032869            7.691956e-05                0.9999986          3
## 1462 GO:0015760            9.484983e-05                0.9999997          2
## 2487 GO:0045893            1.310402e-04                0.9999802          8
## 1798 GO:0030418            1.891972e-04                0.9999990          2
## 1449 GO:0015714            1.979116e-04                0.9999990          2
## 1438 GO:0015689            2.211728e-04                0.9999988          2
##      numInCat                                                term ontology
## 3114       62              cellular response to ethylene stimulus       BP
## 3164       52                   cellular response to nitric oxide       BP
## 3094       77                       cellular response to iron ion       BP
## 2907       69                                iron ion homeostasis       BP
## 359      2992          regulation of transcription, DNA-templated       BP
## 1195       53                                 response to nitrate       BP
## 613        20              intracellular sequestering of iron ion       BP
## 1760      130                                 metal ion transport       BP
## 1764       21                  cellular manganese ion homeostasis       BP
## 254       138                                  glycolytic process       BP
## 612        29                       cellular iron ion homeostasis       BP
## 1114       33                                response to iron ion       BP
## 1952       30               cellular response to insulin stimulus       BP
## 1462        5                       glucose-6-phosphate transport       BP
## 2487      570 positive regulation of transcription, DNA-templated       BP
## 1798        8                  nicotianamine biosynthetic process       BP
## 1449        7                       phosphoenolpyruvate transport       BP
## 1438        7                             molybdate ion transport       BP
##      over_represented_padjust
## 3114             0.000000e+00
## 3164             0.000000e+00
## 3094             7.435558e-08
## 2907             1.329362e-03
## 359              7.053851e-03
## 1195             7.053851e-03
## 613              8.174561e-03
## 1760             9.781343e-03
## 1764             9.781343e-03
## 254              2.100319e-02
## 612              2.228352e-02
## 1114             2.241909e-02
## 1952             2.241909e-02
## 1462             2.567043e-02
## 2487             3.310074e-02
## 1798             4.411100e-02
## 1449             4.411100e-02
## 1438             4.655688e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120            1.723122e-11                1.0000000          7
## 2253 GO:0042742            6.152850e-08                1.0000000         16
## 2750 GO:0050832            1.527669e-06                0.9999998         11
## 1031 GO:0009863            1.866709e-05                0.9999995          4
## 2230 GO:0042372            3.841027e-05                0.9999995          3
## 900  GO:0009627            5.003514e-05                0.9999970          5
## 894  GO:0009617            6.392564e-05                0.9999927          7
## 975  GO:0009759            6.961346e-05                0.9999988          3
## 855  GO:0009409            7.064222e-05                0.9999859         11
## 2790 GO:0051245            9.537243e-05                0.9999997          2
## 1166 GO:0010112            9.614152e-05                0.9999982          3
## 944  GO:0009697            1.070904e-04                0.9999979          3
## 1210 GO:0010200            1.072639e-04                0.9999867          7
## 3438 GO:1900056            1.520096e-04                0.9999966          3
## 2301 GO:0043069            1.719698e-04                0.9999959          3
## 1858 GO:0031348            1.811769e-04                0.9999908          4
## 557  GO:0006749            1.932567e-04                0.9999899          4
##      numInCat                                             term ontology
## 1174       27                   camalexin biosynthetic process       BP
## 2253      726                    defense response to bacterium       BP
## 2750      469                       defense response to fungus       BP
## 1031       38        salicylic acid mediated signaling pathway       BP
## 2230       18               phylloquinone biosynthetic process       BP
## 900       104                     systemic acquired resistance       BP
## 894       241                            response to bacterium       BP
## 975        19        indole glucosinolate biosynthetic process       BP
## 855       696                                 response to cold       BP
## 2790        4 negative regulation of cellular defense response       BP
## 1166       22       regulation of systemic acquired resistance       BP
## 944        21              salicylic acid biosynthetic process       BP
## 1210      286                               response to chitin       BP
## 3438       26           negative regulation of leaf senescence       BP
## 2301       30     negative regulation of programmed cell death       BP
## 1858       64          negative regulation of defense response       BP
## 557        85                    glutathione metabolic process       BP
##      over_represented_padjust
## 1174             6.528911e-08
## 2253             1.165657e-04
## 2750             1.929446e-03
## 1031             1.768240e-02
## 2230             2.910731e-02
## 900              2.974037e-02
## 894              2.974037e-02
## 975              2.974037e-02
## 855              2.974037e-02
## 2790             3.126329e-02
## 1166             3.126329e-02
## 944              3.126329e-02
## 1210             3.126329e-02
## 3438             4.114032e-02
## 2301             4.290496e-02
## 1858             4.290496e-02
## 557              4.307350e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816            9.333650e-07                0.9999999          7
## 3708 GO:2000022            1.751096e-06                0.9999999          6
## 894  GO:0009617            1.952047e-06                0.9999998          9
## 1857 GO:0031347            5.230519e-05                0.9999968          5
##      numInCat                                                    term ontology
## 1001       97 defense response to bacterium, incompatible interaction       BP
## 3708       89  regulation of jasmonic acid mediated signaling pathway       BP
## 894       241                                   response to bacterium       BP
## 1857       95                          regulation of defense response       BP
##      over_represented_padjust
## 1001              0.002465435
## 3708              0.002465435
## 894               0.002465435
## 1857              0.049546090
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            0.000000e+00                1.0000000          9
## 1181 GO:0010143            1.631724e-08                1.0000000          5
## 2917 GO:0055114            2.704739e-07                0.9999999         19
## 514  GO:0006629            2.154168e-06                0.9999999          7
## 515  GO:0006631            2.635121e-06                0.9999999          5
## 1531 GO:0016042            3.108001e-05                0.9999976          6
## 1527 GO:0016024            6.205455e-05                0.9999990          3
##      numInCat                                    term ontology
## 1291       41            suberin biosynthetic process       BP
## 1181       29              cutin biosynthetic process       BP
## 2917     1923             oxidation-reduction process       BP
## 514       220                 lipid metabolic process       BP
## 515        85            fatty acid metabolic process       BP
## 1531      229                 lipid catabolic process       BP
## 1527       29 CDP-diacylglycerol biosynthetic process       BP
##      over_represented_padjust
## 1291             0.000000e+00
## 1181             3.091302e-05
## 2917             3.416085e-04
## 514              1.996895e-03
## 515              1.996895e-03
## 1531             1.962703e-02
## 1527             3.358924e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1530 GO:0016036            3.257160e-06                0.9999999          5
## 3219 GO:0080040            1.654299e-05                1.0000000          2
##      numInCat                                                             term
## 1530      148                        cellular response to phosphate starvation
## 3219        5 positive regulation of cellular response to phosphate starvation
##      ontology over_represented_padjust
## 1530       BP               0.01234138
## 3219       BP               0.03134070
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            2.034123e-17                1.0000000         24
## 423  GO:0006468            1.807727e-12                1.0000000         29
## 1210 GO:0010200            7.269714e-08                1.0000000          9
## 972  GO:0009751            4.471802e-07                1.0000000          9
## 2750 GO:0050832            1.489589e-06                0.9999998         10
## 185  GO:0002229            1.667994e-05                0.9999988          6
## 638  GO:0006952            7.042662e-05                0.9999827         14
## 1174 GO:0010120            7.723694e-05                0.9999986          3
## 1858 GO:0031348            9.506968e-05                0.9999959          4
## 3708 GO:2000022            1.129933e-04                0.9999949          4
##      numInCat                                                   term ontology
## 2253      726                          defense response to bacterium       BP
## 423      1484                                protein phosphorylation       BP
## 1210      286                                     response to chitin       BP
## 972       347                             response to salicylic acid       BP
## 2750      469                             defense response to fungus       BP
## 185       126                          defense response to oomycetes       BP
## 638      1165                                       defense response       BP
## 1174       27                         camalexin biosynthetic process       BP
## 1858       64                negative regulation of defense response       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
##      over_represented_padjust
## 2253             7.707293e-14
## 423              3.424739e-09
## 1210             9.181649e-05
## 972              4.235915e-04
## 2750             1.128811e-03
## 185              1.053338e-02
## 638              3.658134e-02
## 1174             3.658134e-02
## 1858             4.002434e-02
## 3708             4.281315e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1737 GO:0019761            2.032233e-05                0.9999994          4
## 2997 GO:0070179            2.418116e-05                1.0000000          2
##      numInCat                               term ontology
## 1737       69 glucosinolate biosynthetic process       BP
## 2997        4      D-serine biosynthetic process       BP
##      over_represented_padjust
## 1737                0.0458112
## 2997                0.0458112
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            4.498580e-11                1.0000000         19
## 914  GO:0009644            9.502988e-08                1.0000000         10
## 359  GO:0006355            1.299821e-07                1.0000000         58
## 3125 GO:0071456            8.872693e-07                1.0000000          6
## 3708 GO:2000022            2.292534e-06                0.9999998          8
## 2750 GO:0050832            3.085071e-06                0.9999993         17
## 638  GO:0006952            4.156627e-06                0.9999986         29
## 2239 GO:0042542            4.285154e-06                0.9999995          9
## 1919 GO:0032268            8.024532e-06                1.0000000          3
## 854  GO:0009408            8.921277e-06                0.9999983         13
## 58   GO:0000302            3.385541e-05                0.9999966          7
## 853  GO:0009407            3.481444e-05                0.9999973          6
## 2795 GO:0051259            3.977016e-05                0.9999987          4
## 170  GO:0001944            4.197116e-05                0.9999976          5
## 3465 GO:1900457            9.998220e-05                0.9999983          3
## 557  GO:0006749            1.469226e-04                0.9999849          6
##      numInCat                                                     term ontology
## 1210      286                                       response to chitin       BP
## 914       109                         response to high light intensity       BP
## 359      2992               regulation of transcription, DNA-templated       BP
## 3125       35                             cellular response to hypoxia       BP
## 3708       89   regulation of jasmonic acid mediated signaling pathway       BP
## 2750      469                               defense response to fungus       BP
## 638      1165                                         defense response       BP
## 2239      130                            response to hydrogen peroxide       BP
## 1919        5         regulation of cellular protein metabolic process       BP
## 854       305                                         response to heat       BP
## 58         96                      response to reactive oxygen species       BP
## 853        67                                  toxin catabolic process       BP
## 2795       22                          protein complex oligomerization       BP
## 170        41                                  vasculature development       BP
## 3465       10 regulation of brassinosteroid mediated signaling pathway       BP
## 557        85                            glutathione metabolic process       BP
##      over_represented_padjust
## 1210             1.704512e-07
## 914              1.641674e-04
## 359              1.641674e-04
## 3125             8.404659e-04
## 3708             1.737282e-03
## 2750             1.948222e-03
## 638              2.029556e-03
## 2239             2.029556e-03
## 1919             3.378328e-03
## 854              3.380272e-03
## 58               1.099266e-02
## 853              1.099266e-02
## 2795             1.135919e-02
## 170              1.135919e-02
## 3465             2.525550e-02
## 557              3.479310e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##      category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103            9.838045e-10                        1          5
##    numInCat                 term ontology over_represented_padjust
## 26       24 sulfate assimilation       BP             3.727635e-06
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441            9.629362e-08                1.0000000          3
## 1292 GO:0010350            9.046552e-06                1.0000000          2
## 3096 GO:0071286            9.046552e-06                1.0000000          2
## 3104 GO:0071325            9.046552e-06                1.0000000          2
## 3197 GO:0072709            9.046552e-06                1.0000000          2
## 1838 GO:0031115            1.460646e-05                1.0000000          2
## 859  GO:0009414            1.567843e-05                0.9999986          7
## 2844 GO:0051592            2.317204e-05                1.0000000          2
## 1619 GO:0018008            2.655419e-05                1.0000000          2
## 1840 GO:0031117            2.684154e-05                1.0000000          2
## 3201 GO:0075733            2.734060e-05                0.9999999          2
## 1248 GO:0010248            2.913482e-05                0.9999999          2
## 3093 GO:0071280            2.918774e-05                0.9999999          2
## 2162 GO:0035865            2.961950e-05                0.9999999          2
## 967  GO:0009744            3.347001e-05                0.9999989          4
## 646  GO:0006979            5.215913e-05                0.9999956          6
## 2829 GO:0051511            6.257254e-05                0.9999998          2
## 1282 GO:0010325            1.318124e-04                0.9999994          2
## 736  GO:0007568            1.789968e-04                0.9999956          3
## 3081 GO:0071219            1.826884e-04                0.9999990          2
## 441  GO:0006499            2.422844e-04                0.9999934          3
##      numInCat
## 1695        7
## 1292        4
## 3096        4
## 3104        4
## 3197        4
## 1838        5
## 859       596
## 2844        6
## 1619        6
## 1840        7
## 3201        7
## 1248        6
## 3093        7
## 2162        7
## 967       136
## 646       502
## 2829        9
## 1282       11
## 736        85
## 3081       14
## 441        89
##                                                                        term
## 1695                             tryptophan catabolic process to kynurenine
## 1292                              cellular response to magnesium starvation
## 3096                                     cellular response to magnesium ion
## 3104                                 cellular response to mannitol stimulus
## 3197                                          cellular response to sorbitol
## 1838                      negative regulation of microtubule polymerization
## 859                                           response to water deprivation
## 2844                                                response to calcium ion
## 1619                           N-terminal peptidyl-glycine N-myristoylation
## 1840                    positive regulation of microtubule depolymerization
## 3201                                       intracellular transport of virus
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 3093                                        cellular response to copper ion
## 2162                                     cellular response to potassium ion
## 967                                                     response to sucrose
## 646                                            response to oxidative stress
## 2829                      negative regulation of unidimensional cell growth
## 1282                  raffinose family oligosaccharide biosynthetic process
## 736                                                                   aging
## 3081                      cellular response to molecule of bacterial origin
## 441                                       N-terminal protein myristoylation
##      ontology over_represented_padjust
## 1695       BP             0.0003648565
## 1292       BP             0.0068554769
## 3096       BP             0.0068554769
## 3104       BP             0.0068554769
## 3197       BP             0.0068554769
## 1838       BP             0.0080163051
## 859        BP             0.0080163051
## 2844       BP             0.0080163051
## 1619       BP             0.0080163051
## 1840       BP             0.0080163051
## 3201       BP             0.0080163051
## 1248       BP             0.0080163051
## 3093       BP             0.0080163051
## 2162       BP             0.0080163051
## 967        BP             0.0084545246
## 646        BP             0.0123519337
## 2829       BP             0.0139463151
## 1282       BP             0.0277465042
## 736        BP             0.0346103103
## 3081       BP             0.0346103103
## 441        BP             0.0437150254
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 853 GO:0009407             7.21299e-06                0.9999998          4
##     numInCat                    term ontology over_represented_padjust
## 853       67 toxin catabolic process       BP               0.02733002
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1572 GO:0016310            4.546280e-06                0.9999993         12
## 2750 GO:0050832            9.987204e-06                0.9999988          9
## 423  GO:0006468            1.451836e-05                0.9999962         18
##      numInCat                       term ontology over_represented_padjust
## 1572      685            phosphorylation       BP               0.01722585
## 2750      469 defense response to fungus       BP               0.01833669
## 423      1484    protein phosphorylation       BP               0.01833669
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1166 GO:0010112            5.131427e-11                1.0000000          7
## 638  GO:0006952            1.205807e-09                1.0000000         28
## 890  GO:0009611            4.343094e-07                0.9999999         14
## 899  GO:0009626            4.232341e-06                0.9999996          8
## 973  GO:0009753            7.729912e-06                0.9999988         11
## 2253 GO:0042742            1.663149e-05                0.9999959         16
## 1205 GO:0010193            1.870619e-05                0.9999991          5
## 2237 GO:0042538            9.358914e-05                0.9999910          6
##      numInCat                                       term ontology
## 1166       22 regulation of systemic acquired resistance       BP
## 638      1165                           defense response       BP
## 890       419                       response to wounding       BP
## 899       140         plant-type hypersensitive response       BP
## 973       338                  response to jasmonic acid       BP
## 2253      726              defense response to bacterium       BP
## 1205       54                          response to ozone       BP
## 2237      132             hyperosmotic salinity response       BP
##      over_represented_padjust
## 1166             1.944298e-07
## 638              2.284402e-06
## 890              5.485327e-04
## 899              4.009085e-03
## 973              5.857727e-03
## 2253             1.012539e-02
## 1205             1.012539e-02
## 2237             4.432616e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 946 GO:0009699            5.793939e-06                0.9999999          4
## 890 GO:0009611            6.268525e-06                0.9999993          9
## 602 GO:0006855            7.426144e-06                0.9999996          6
##     numInCat                                 term ontology
## 946       34 phenylpropanoid biosynthetic process       BP
## 890      419                 response to wounding       BP
## 602      130         drug transmembrane transport       BP
##     over_represented_padjust
## 946               0.00937922
## 890               0.00937922
## 602               0.00937922
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.15cluster.csv")

Diurnal DEGS (3/4 days) clustering and cluster ORAs

Diurnal DEGs (13/14 days) clustering and cluster ORAs